OpenCL clGetPlatformIDs gives around 230 valgrind memcheck errors

The code:


// g++ -std=c++17 -O0 -g -Wall -Wextra -lOpenCL query.cpp -o query
// valgrind --leak-check=full --track-origins=yes --tool=memcheck ./query

#include <CL/cl.h>
#include <CL/cl.hpp>
#include <vector>
#include <string>
#include <iostream>
#include <memory>

using namespace std;

#define CL_CHECK_ERROR(error)                                    \
  if ((error) != CL_SUCCESS){ cout << "ERROR in " << __FILE__ << ":" << __LINE__ << "
"; }

int main(int, char * argv[])
{
  int type = stoi(argv[1]);

  string info_buffer;
  int info_buffer_size = 1024;
  info_buffer.reserve(info_buffer_size);

  if (type == 0){

    vector<cl_platform_id> platforms;
    cl_uint num_platforms;

    CL_CHECK_ERROR(clGetPlatformIDs(0, NULL, &num_platforms));
    cout << "num_platforms: " << num_platforms << "
";
    platforms.reserve(num_platforms);
    platforms.resize(num_platforms);
    CL_CHECK_ERROR(clGetPlatformIDs(num_platforms, platforms.data(), NULL));
    cout << "- " << platforms.data()[0] << "
";
    // cout << "- " << platforms.data()[1] << "
";
    // cout << platforms.size() << "
";
    for (auto& platform : platforms){
      cout << platform << "
";
      size_t size;
      CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size));
      info_buffer.resize(size); // works also with size - 1
      CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, info_buffer.data(), NULL));
      cout << "platform: '" << info_buffer << "'
";
    }

  }else if (type == 1){

    cl_uint num_platforms;

    CL_CHECK_ERROR(clGetPlatformIDs(0, NULL, &num_platforms));
    cout << "num_platforms: " << num_platforms << "
";
    unique_ptr<cl_platform_id[]> platforms(new cl_platform_id[num_platforms]);

    CL_CHECK_ERROR(clGetPlatformIDs(num_platforms, platforms.get(), NULL));
    cout << "- " << platforms[0] << "
";
    // cout << "- " << platforms[1] << "
";
    for (uint i=0; i<num_platforms; i++){
      cl_platform_id platform = platforms[i];
      cout << platforms[i] << "
";
      size_t size;
      CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size));
      info_buffer.resize(size); // works also with size - 1
      CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, info_buffer.data(), NULL));
      cout << "platform: '" << info_buffer << "'
";
    }

  } else {
    vector<cl::Platform> platforms;
    cl::Platform::get(&platforms);
    cout << "num_platforms: " << platforms.size() << "
";
    cout << "- " << platforms.at(0)() << "
";
    // cout << "- " << platforms[1] << "
";
    for (auto& platform : platforms) {
      CL_CHECK_ERROR(platform.getInfo(CL_PLATFORM_NAME, &info_buffer));
      cout << "platform: " << info_buffer << "
";
    }
  }

  return 0;
}

The compilation:


g++ -std=c++17 -O0 -g -Wall -Wextra -lOpenCL query.cpp -o query

Valgrind:


valgrind --leak-check=full --track-origins=yes --tool=memcheck ./query 0 2>query_t0.memcheck

valgrind --leak-check=full --track-origins=yes --tool=memcheck ./query 1 2>query_t1.memcheck

valgrind --leak-check=full --track-origins=yes --tool=memcheck ./query 2 2>query_t2.memcheck

In t0 and t1 cases (.h):


==8877== LEAK SUMMARY:
==8877==    definitely lost: 2,804 bytes in 19 blocks
==8877==    indirectly lost: 1,444 bytes in 9 blocks
==8877==      possibly lost: 152 bytes in 1 blocks
==8877==    still reachable: 3,056,953 bytes in 1,706 blocks
==8877==                       of which reachable via heuristic:
==8877==                         newarray           : 7,192 bytes in 7 blocks
==8877==         suppressed: 0 bytes in 0 blocks
==8877== Reachable blocks (those to which a pointer was found) are not shown.
==8877== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==8877==
==8877== For counts of detected and suppressed errors, rerun with: -v
==8877== ERROR SUMMARY: 231 errors from 76 contexts (suppressed: 1 from 1)

In t2 (.hpp):


==32316== LEAK SUMMARY:
==32316==    definitely lost: 2,956 bytes in 20 blocks
==32316==    indirectly lost: 1,444 bytes in 9 blocks
==32316==      possibly lost: 0 bytes in 0 blocks
==32316==    still reachable: 3,056,953 bytes in 1,706 blocks
==32316==                       of which reachable via heuristic:
==32316==                         newarray           : 7,192 bytes in 7 blocks
==32316==         suppressed: 0 bytes in 0 blocks
==32316== Reachable blocks (those to which a pointer was found) are not shown.
==32316== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==32316==
==32316== For counts of detected and suppressed errors, rerun with: -v
==32316== ERROR SUMMARY: 230 errors from 75 contexts (suppressed: 1 from 1)

Some errors (from the t2):


==32316== 168 (144 direct, 24 indirect) bytes in 1 blocks are definitely lost in loss record 1,201 of 1,366
==32316==    at 0x4C2CEBF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==32316==    by 0xB4DDF64: Priv_Main_Control_Refresh() (in /usr/lib/libatiadlxx.so)
==32316==    by 0xB4E669F: ADL_Main_Control_Refresh_X() (in /usr/lib/libatiadlxx.so)
==32316==    by 0xB4F3073: ADL2_Main_Control_Refresh (in /usr/lib/libatiadlxx.so)
==32316==    by 0xB4F58E1: ADL2_Main_Control_Create (in /usr/lib/libatiadlxx.so)
==32316==    by 0x7A50470: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x7D6F340: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x7D3F8C6: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x7D3F91B: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x7D44C73: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x7D2E7C6: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x7A27F26: ??? (in /usr/lib/libamdocl64.so)
==32316==
==32316== 278 (240 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 1,225 of 1,366
==32316==    at 0x4C2D51F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==32316==    by 0x63C2DD7: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x6339563: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x6339F61: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x634FD84: clGetPlatformIDs (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x4E3EBD1: ??? (in /usr/lib/libOpenCL.so.1.0.0)
==32316==    by 0x4E3FE83: clGetPlatformIDs (in /usr/lib/libOpenCL.so.1.0.0)
==32316==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==32316==    by 0x10B143: main (query.cpp:69)
==32316==
==32316== 286 (248 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 1,227 of 1,366
==32316==    at 0x4C2D51F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==32316==    by 0x63C2F09: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x6339563: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x6339F61: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x634FD84: clGetPlatformIDs (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x4E3EBD1: ??? (in /usr/lib/libOpenCL.so.1.0.0)
==32316==    by 0x4E3FE83: clGetPlatformIDs (in /usr/lib/libOpenCL.so.1.0.0)
==32316==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==32316==    by 0x10B143: main (query.cpp:69)
==32316==
==32316== 512 bytes in 1 blocks are definitely lost in loss record 1,252 of 1,366
==32316==    at 0x4C2D51F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==32316==    by 0x63EEAA5: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x63F4526: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x63F4B73: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x6339486: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x6339F61: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x634FD84: clGetPlatformIDs (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
==32316==    by 0x4E3EBD1: ??? (in /usr/lib/libOpenCL.so.1.0.0)
==32316==    by 0x4E3FE83: clGetPlatformIDs (in /usr/lib/libOpenCL.so.1.0.0)
==32316==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==32316==    by 0x10B143: main (query.cpp:69)
==32316==
==32316== 1,520 bytes in 10 blocks are definitely lost in loss record 1,290 of 1,366
==32316==    at 0x4C2CEBF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==32316==    by 0xD45BEBD: amdcl::scCompileImpl::Text2Stream(amdcl::_il_string_rec const&, amdcl::_il_binary_rec&) (in /usr/lib/libamdocl12cl64.so)
==32316==    by 0xD463218: amdcl::AMDIL::toBinary(char const*, unsigned long*) (in /usr/lib/libamdocl12cl64.so)
==32316==    by 0xD4646BD: amdcl::AMDIL::compile(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, amdcl::scCompileBase*) (in /usr/lib/libamdocl12cl64.so)
==32316==    by 0xD46306F: amdcl::AMDIL::compile(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >) (in /usr/lib/libamdocl12cl64.so)
==32316==    by 0xCF80AB3: AMDILFEToISA(_acl_loader_data_0_8**, char const*, unsigned long) (in /usr/lib/libamdocl12cl64.so)
==32316==    by 0xCF828B4: if_aclCompile(_acl_compiler_rec_0_8_1*, _acl_bif_rec_0_8_1*, char const*, _acl_type_enum_0_8, _acl_type_enum_0_8, void (*)(char const*, unsigned long)) (in /usr/lib/libamdocl12cl64.so)
==32316==    by 0x82C35D8: aclCompile (in /usr/lib/libamdocl64.so)
==32316==    by 0x79930E7: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x799375F: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x79A2111: ??? (in /usr/lib/libamdocl64.so)
==32316==    by 0x79A4929: ??? (in /usr/lib/libamdocl64.so)

As you can see, I have tried three different ways to query the platforms (t0, t1 using the C API, t2 using the C++ API). How can I remove the memory leaks in my code? Am I doing something wrong?

From those 231 errors from 76 contexts, my file appears in just 4 © or 3 (C++) messages (grep 'cpp' query_t1.memcheck vs grep 'lost' query_t1.memcheck). So, how can I remove those memory leaks if are not leaked from my code? Can I do something there?

Example:


==32316==    by 0x10B143: main (query.cpp:69)
==32316==    by 0x10B143: main (query.cpp:69)
==32316==    by 0x10B143: main (query.cpp:69)

==32316== 8 bytes in 1 blocks are definitely lost in loss record 53 of 1,366
==32316== 8 bytes in 1 blocks are definitely lost in loss record 54 of 1,366
==32316== 8 bytes in 1 blocks are definitely lost in loss record 55 of 1,366
==32316== 8 bytes in 1 blocks are definitely lost in loss record 56 of 1,366
==32316== 20 bytes in 1 blocks are definitely lost in loss record 94 of 1,366
==32316== 168 (144 direct, 24 indirect) bytes in 1 blocks are definitely lost in loss record 1,201 of 1,366
==32316== 278 (240 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 1,225 of 1,366
==32316== 286 (248 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 1,227 of 1,366
==32316== 512 bytes in 1 blocks are definitely lost in loss record 1,252 of 1,366
==32316== 1,520 bytes in 10 blocks are definitely lost in loss record 1,290 of 1,366
==32316== 1,584 (240 direct, 1,344 indirect) bytes in 1 blocks are definitely lost in loss record 1,291 of 1,366
==32316==    definitely lost: 2,956 bytes in 20 blocks
==32316==    indirectly lost: 1,444 bytes in 9 blocks
==32316==      possibly lost: 0 bytes in 0 blocks

In case this is AMD driver, they responded that Valgrind is wrong. We don’t have a choice other than believe them, I guess.

Thank you. I did the same example with an Intel CPU (Intel Driver) and I got 26 errors:

In all cases (t0, t1 with .h, t2 with .hpp):


==2208== LEAK SUMMARY:
==2208==    definitely lost: 1,072 bytes in 5 blocks
==2208==    indirectly lost: 76 bytes in 2 blocks
==2208==      possibly lost: 368 bytes in 1 blocks
==2208==    still reachable: 60,016 bytes in 209 blocks
==2208==         suppressed: 0 bytes in 0 blocks
==2208== Reachable blocks (those to which a pointer was found) are not shown.
==2208== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==2208==
==2208== For counts of detected and suppressed errors, rerun with: -v
==2208== ERROR SUMMARY: 26 errors from 20 contexts (suppressed: 0 from 0)

Some errors (from the t2):


==2304== Conditional jump or move depends on uninitialised value(s)
==2304==    at 0x7B2B183: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x7B0DEA2: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==2304==    by 0x10B143: main (query.cpp:69)
==2304==  Uninitialised value was created by a heap allocation
==2304==    at 0x4C2D7FF: operator new(unsigned long, std::nothrow_t const&) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2304==    by 0x7B0DE8A: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==2304==    by 0x10B143: main (query.cpp:69)
==2304== Conditional jump or move depends on uninitialised value(s)
==2304==    at 0x7B0DED7: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==2304==    by 0x10B143: main (query.cpp:69)
==2304==  Uninitialised value was created by a heap allocation
==2304==    at 0x4C2D7FF: operator new(unsigned long, std::nothrow_t const&) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2304==    by 0x7B0DE8A: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==2304==    by 0x10B143: main (query.cpp:69)
==2304== Conditional jump or move depends on uninitialised value(s)
==2304==    at 0x7B0C988: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x7B0CE6C: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x7B61D9F: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x400FB92: _dl_fini (in /usr/lib/ld-2.26.so)
==2304==    by 0x5962487: __run_exit_handlers (in /usr/lib/libc-2.26.so)
==2304==    by 0x59624D9: exit (in /usr/lib/libc-2.26.so)
==2304==    by 0x594BF70: (below main) (in /usr/lib/libc-2.26.so)
==2304==  Uninitialised value was created by a heap allocation
==2304==    at 0x4C2D7FF: operator new(unsigned long, std::nothrow_t const&) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2304==    by 0x7B0DE8A: ??? (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==2304==    by 0x10B143: main (query.cpp:69)
==2304== 8 bytes in 1 blocks are definitely lost in loss record 5 of 171
==2304==    at 0x4C2CEFF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2304==    by 0x4E3B7C8: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
==2304==    by 0x10B143: main (query.cpp:69)
==2304==
==2304== 64 bytes in 1 blocks are definitely lost in loss record 136 of 171
==2304==    at 0x4C2CEFF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2304==    by 0x4014015: dl_open_worker (in /usr/lib/ld-2.26.so)
==2304==    by 0x5A5D143: _dl_catch_error (in /usr/lib/libc-2.26.so)
==2304==    by 0x4013319: _dl_open (in /usr/lib/ld-2.26.so)
==2304==    by 0x5F01E85: ??? (in /usr/lib/libdl-2.26.so)
==2304==    by 0x5A5D143: _dl_catch_error (in /usr/lib/libc-2.26.so)
==2304==    by 0x5F02586: ??? (in /usr/lib/libdl-2.26.so)
==2304==    by 0x5F01F21: dlopen (in /usr/lib/libdl-2.26.so)
==2304==    by 0x758EDE1: ??? (in /opt/intel/opencl/libtbbmalloc.so.2)
==2304==    by 0x400F519: call_init.part.0 (in /usr/lib/ld-2.26.so)
==2304==    by 0x400F625: _dl_init (in /usr/lib/ld-2.26.so)
==2304==    by 0x4013AFD: dl_open_worker (in /usr/lib/ld-2.26.so)
==2304==
==2304== 278 (240 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 149 of 171
==2304==    at 0x4C2D56F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2304==    by 0x69FE787: ??? (in /opt/intel/opencl/libintelocl.so)
==2304==    by 0x69750C0: ??? (in /opt/intel/opencl/libintelocl.so)
==2304==    by 0x6975911: ??? (in /opt/intel/opencl/libintelocl.so)
==2304==    by 0x698B734: clGetPlatformIDs (in /opt/intel/opencl/libintelocl.so)
==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
===2304== 286 (248 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 150 of 171
==2304==    at 0x4C2D56F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2304==    by 0x69FE8B9: ??? (in /opt/intel/opencl/libintelocl.so)
==2304==    by 0x69750C0: ??? (in /opt/intel/opencl/libintelocl.so)
==2304==    by 0x6975911: ??? (in /opt/intel/opencl/libintelocl.so)
==2304==    by 0x698B734: clGetPlatformIDs (in /opt/intel/opencl/libintelocl.so)
==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
=2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)

Do you see something wrong in my code? I mean, some memory leak? Do I use correctly the calls?

If you don’t mind, I put here a longer example, in case you find something weird/not properly used:


  string info_buffer;
  uint info_buffer_size = 1024;
  info_buffer.reserve(info_buffer_size);
  cl_uint num_platforms;

  CL_CHECK_ERROR(clGetPlatformIDs(0, NULL, &num_platforms));
  // platforms = unique_ptr<cl_platform_id>(new cl_platform_id[num_platforms]);
  unique_ptr<cl_platform_id[]> platforms(new cl_platform_id[num_platforms]);

  CL_CHECK_ERROR(clGetPlatformIDs(num_platforms, platforms.get(), NULL));
  for (uint i=0; i<num_platforms; i++){
    cl_platform_id platform = platforms[i];
    size_t size;
    CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size));
    if (size > info_buffer_size){
      info_buffer.reserve(size);
    }
    info_buffer.resize(size); // works also with size - 1
    CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, info_buffer.data(), NULL));
    string_erase_null(info_buffer);
    if (Show::showIfMore(show_info)) {
      cout << "platform: " << info_buffer << "
";
    }

    cl_uint num_devices;
    CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices));
    unique_ptr<cl_device_id[]> devices(new cl_device_id[num_devices]);
    CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices.get(), NULL));
    for (uint j=0; j<num_devices; j++){
      cl_device_id device = devices[j];
      CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &size));
      if (size > info_buffer_size){
        info_buffer.reserve(size);
      }
      info_buffer.resize(size); // works also with size - 1
      CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_NAME, size, info_buffer.data(), NULL));
      string_erase_null(info_buffer);
      if (Show::showIfMore(show_info)) {
        cout << "  device: " << info_buffer << "
";
      }
      CL_CHECK_ERROR(clReleaseDevice(device));
    }

  }

  if (Show::showIfMore(show_info)) {
    cout << "num platforms: " << num_platforms << "
";
  }
  if (sel_platform >= num_platforms) {
    sel_platform = num_platforms - 1;
    cout << "sel_platform changed to: " << sel_platform << "(to fit number of platforms)
";
  }

  cl_platform_id platform = platforms[sel_platform];

  size_t size;
  CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size));
  if (size > info_buffer_size){
    info_buffer.reserve(size);
  }
  info_buffer.resize(size); // works also with size - 1
  CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, info_buffer.data(), NULL));
  string_erase_null(info_buffer);
  if (Show::showIfLessOrMore(show_info)) {
    cout << "Selected platform: " << info_buffer << "
";
  }

  cl_uint num_devices;
  CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices));
  if (Show::showIfMore(show_info)) {
    cout << "num devices in selected platform: " << num_devices << "
";
  }
  unique_ptr<cl_device_id[]> devices(new cl_device_id[num_devices]);
  CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices.get(), NULL));

  if (sel_device >= num_devices) {
    sel_device = num_devices - 1;
    cout << "sel_device changed to: " << sel_device << " (to fit number of devices)
";
  }

  cl_device_id device = devices[sel_device];

  CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &size));
  if (size > info_buffer_size){
    info_buffer.reserve(size);
  }
  info_buffer.resize(size); // works also with size - 1
  CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_NAME, size, info_buffer.data(), NULL));
  string_erase_null(info_buffer);
  if (Show::showIfLessOrMore(show_info)) {
    cout << "Selected device: " << info_buffer << "
";
  }

  // 2) context
  cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &cl_err);
  CL_CHECK_ERROR(cl_err);


  // buffers
  cl_int buffer_flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;

  cl_mem a_buffer = clCreateBuffer(context, buffer_flags, problem_size * sizeof(cl_uchar4), a_array.data(), &cl_err);
  CL_CHECK_ERROR(cl_err, "a_buffer");

  cl_mem b_buffer = clCreateBuffer(context, buffer_flags, gaussian._filter_total_size * sizeof(cl_float), b_array.data(), &cl_err);
  CL_CHECK_ERROR(cl_err, "b_buffer");

  cl_mem c_buffer = clCreateBuffer(context, buffer_flags, problem_size * sizeof(cl_uchar4), c_array.data(), &cl_err);
  CL_CHECK_ERROR(cl_err, "c_buffer");

  // kernel
  unique_ptr<const char*[]> source_codes(new const char*[1]{kernelstr.data()});
  unique_ptr<const size_t[]> source_lengths(new const size_t[1]{kernelstr.length()});
  cl_program program = clCreateProgramWithSource(context, 1, source_codes.get(), source_lengths.get(), &cl_err);
  CL_CHECK_ERROR(cl_err);
  // cl::Program::Sources sources;
  // sources.push_back({kernelstr.c_str(), kernelstr.length()});

  cl_err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
  // cl::Program program(context, sources);
  // cl_err = program.build({device});
  if (cl_int cl_build_err = cl_err; cl_err != CL_SUCCESS) {
    CL_CHECK_ERROR(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &size));
    if (size > info_buffer_size){
      info_buffer.reserve(size);
    }
    info_buffer.resize(size);
    CL_CHECK_ERROR(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, size, info_buffer.data(), NULL));
    string_erase_null(info_buffer);
    if (Show::showIfLessOrMore(show_info)) {
      cout << "Selected platform: " << info_buffer << "
";
    }
    cout << " Error building: " << info_buffer << "
";
    CL_CHECK_ERROR(cl_build_err);
  }

  cl_kernel kernel = clCreateKernel(program, "gaussian_blur", &cl_err);
  CL_CHECK_ERROR(cl_err, "kernel");

  cl_err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &c_buffer);
  CL_CHECK_ERROR(cl_err, "kernel arg c_buffer");

  cl_err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &a_buffer);
  CL_CHECK_ERROR(cl_err, "kernel arg a_buffer");

  cl_err = clSetKernelArg(kernel, 2, sizeof(cl_int), &gaussian._height);
  CL_CHECK_ERROR(cl_err, "kernel arg height");

  cl_err = clSetKernelArg(kernel, 3, sizeof(cl_int), &gaussian._width);
  CL_CHECK_ERROR(cl_err, "kernel arg width");

  cl_err = clSetKernelArg(kernel, 4, sizeof(cl_mem), &b_buffer);
  CL_CHECK_ERROR(cl_err, "kernel arg b_buffer");

  cl_err = clSetKernelArg(kernel, 5, sizeof(cl_int), &gaussian._filter_width);
  CL_CHECK_ERROR(cl_err, "kernel arg filter width");

  // queue
  cl_command_queue_properties queue_props = 0;
  cl_command_queue queue = clCreateCommandQueue(context, device, queue_props, &cl_err);
  // cl::CommandQueue queue(context, device, 0, &cl_err);
  CL_CHECK_ERROR(cl_err, "CommandQueue queue");

  // write
  CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, a_buffer, CL_TRUE, 0, sizeof(cl_uchar4) * problem_size, a_array.data(), 0, NULL, NULL));

  CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, b_buffer, CL_TRUE, 0, sizeof(cl_float) * gaussian._filter_total_size, b_array.data(), 0, NULL, NULL));

  const size_t gwo = {0};
  const size_t gws = {problem_size};
  const size_t lws = {CL_LWS};
  CL_CHECK_ERROR(clEnqueueNDRangeKernel(queue, kernel, 1, &gwo, &gws, &lws, 0, NULL, NULL));

  CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, c_buffer, CL_TRUE, 0, sizeof(cl_uchar4) * problem_size, c_array.data(), 0, NULL, NULL));

  CL_CHECK_ERROR(clFinish(queue));

  CL_CHECK_ERROR(clReleaseCommandQueue(queue));
  CL_CHECK_ERROR(clReleaseKernel(kernel));
  CL_CHECK_ERROR(clReleaseProgram(program));
  CL_CHECK_ERROR(clReleaseMemObject(c_buffer));
  CL_CHECK_ERROR(clReleaseMemObject(b_buffer));
  CL_CHECK_ERROR(clReleaseMemObject(a_buffer));
  CL_CHECK_ERROR(clReleaseContext(context));
  CL_CHECK_ERROR(clReleaseDevice(device));


Regarding memory leaks, of course. I don’t paste all the surrounding code, but just the OpenCL calls.

AMD CodeXL shows missing object releases if this will put your mind at ease. I never tried it under Linux though.