Results 1 to 4 of 4

Thread: OpenCL clGetPlatformIDs gives around 230 valgrind memcheck errors

  1. #1
    Junior Member
    Join Date
    May 2017
    Posts
    5

    OpenCL clGetPlatformIDs gives around 230 valgrind memcheck errors

    The code:

    Code :
    // g++ -std=c++17 -O0 -g -Wall -Wextra -lOpenCL query.cpp -o query
    // valgrind --leak-check=full --track-origins=yes --tool=memcheck ./query
     
    #include <CL/cl.h>
    #include <CL/cl.hpp>
    #include <vector>
    #include <string>
    #include <iostream>
    #include <memory>
     
    using namespace std;
     
    #define CL_CHECK_ERROR(error)                                    \
      if ((error) != CL_SUCCESS){ cout << "ERROR in " << __FILE__ << ":" << __LINE__ << "\n"; }
     
    int main(int, char * argv[])
    {
      int type = stoi(argv[1]);
     
      string info_buffer;
      int info_buffer_size = 1024;
      info_buffer.reserve(info_buffer_size);
     
      if (type == 0){
     
        vector<cl_platform_id> platforms;
        cl_uint num_platforms;
     
        CL_CHECK_ERROR(clGetPlatformIDs(0, NULL, &num_platforms));
        cout << "num_platforms: " << num_platforms << "\n";
        platforms.reserve(num_platforms);
        platforms.resize(num_platforms);
        CL_CHECK_ERROR(clGetPlatformIDs(num_platforms, platforms.data(), NULL));
        cout << "- " << platforms.data()[0] << "\n";
        // cout << "- " << platforms.data()[1] << "\n";
        // cout << platforms.size() << "\n";
        for (auto& platform : platforms){
          cout << platform << "\n";
          size_t size;
          CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size));
          info_buffer.resize(size); // works also with size - 1
          CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, info_buffer.data(), NULL));
          cout << "platform: '" << info_buffer << "'\n";
        }
     
      }else if (type == 1){
     
        cl_uint num_platforms;
     
        CL_CHECK_ERROR(clGetPlatformIDs(0, NULL, &num_platforms));
        cout << "num_platforms: " << num_platforms << "\n";
        unique_ptr<cl_platform_id[]> platforms(new cl_platform_id[num_platforms]);
     
        CL_CHECK_ERROR(clGetPlatformIDs(num_platforms, platforms.get(), NULL));
        cout << "- " << platforms[0] << "\n";
        // cout << "- " << platforms[1] << "\n";
        for (uint i=0; i<num_platforms; i++){
          cl_platform_id platform = platforms[i];
          cout << platforms[i] << "\n";
          size_t size;
          CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size));
          info_buffer.resize(size); // works also with size - 1
          CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, info_buffer.data(), NULL));
          cout << "platform: '" << info_buffer << "'\n";
        }
     
      } else {
        vector<cl::Platform> platforms;
        cl::Platform::get(&platforms);
        cout << "num_platforms: " << platforms.size() << "\n";
        cout << "- " << platforms.at(0)() << "\n";
        // cout << "- " << platforms[1] << "\n";
        for (auto& platform : platforms) {
          CL_CHECK_ERROR(platform.getInfo(CL_PLATFORM_NAME, &info_buffer));
          cout << "platform: " << info_buffer << "\n";
        }
      }
     
      return 0;
    }


    The compilation:

    Code :
    g++ -std=c++17 -O0 -g -Wall -Wextra -lOpenCL query.cpp -o query

    Valgrind:

    Code :
    valgrind --leak-check=full --track-origins=yes --tool=memcheck ./query 0 2>query_t0.memcheck
     
    valgrind --leak-check=full --track-origins=yes --tool=memcheck ./query 1 2>query_t1.memcheck
     
    valgrind --leak-check=full --track-origins=yes --tool=memcheck ./query 2 2>query_t2.memcheck

    In t0 and t1 cases (.h):

    Code :
    ==8877== LEAK SUMMARY:
    ==8877==    definitely lost: 2,804 bytes in 19 blocks
    ==8877==    indirectly lost: 1,444 bytes in 9 blocks
    ==8877==      possibly lost: 152 bytes in 1 blocks
    ==8877==    still reachable: 3,056,953 bytes in 1,706 blocks
    ==8877==                       of which reachable via heuristic:
    ==8877==                         newarray           : 7,192 bytes in 7 blocks
    ==8877==         suppressed: 0 bytes in 0 blocks
    ==8877== Reachable blocks (those to which a pointer was found) are not shown.
    ==8877== To see them, rerun with: --leak-check=full --show-leak-kinds=all
    ==8877==
    ==8877== For counts of detected and suppressed errors, rerun with: -v
    ==8877== ERROR SUMMARY: 231 errors from 76 contexts (suppressed: 1 from 1)

    In t2 (.hpp):

    Code :
    ==32316== LEAK SUMMARY:
    ==32316==    definitely lost: 2,956 bytes in 20 blocks
    ==32316==    indirectly lost: 1,444 bytes in 9 blocks
    ==32316==      possibly lost: 0 bytes in 0 blocks
    ==32316==    still reachable: 3,056,953 bytes in 1,706 blocks
    ==32316==                       of which reachable via heuristic:
    ==32316==                         newarray           : 7,192 bytes in 7 blocks
    ==32316==         suppressed: 0 bytes in 0 blocks
    ==32316== Reachable blocks (those to which a pointer was found) are not shown.
    ==32316== To see them, rerun with: --leak-check=full --show-leak-kinds=all
    ==32316==
    ==32316== For counts of detected and suppressed errors, rerun with: -v
    ==32316== ERROR SUMMARY: 230 errors from 75 contexts (suppressed: 1 from 1)

    Some errors (from the t2):

    Code :
    ==32316== 168 (144 direct, 24 indirect) bytes in 1 blocks are definitely lost in loss record 1,201 of 1,366
    ==32316==    at 0x4C2CEBF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==32316==    by 0xB4DDF64: Priv_Main_Control_Refresh() (in /usr/lib/libatiadlxx.so)
    ==32316==    by 0xB4E669F: ADL_Main_Control_Refresh_X() (in /usr/lib/libatiadlxx.so)
    ==32316==    by 0xB4F3073: ADL2_Main_Control_Refresh (in /usr/lib/libatiadlxx.so)
    ==32316==    by 0xB4F58E1: ADL2_Main_Control_Create (in /usr/lib/libatiadlxx.so)
    ==32316==    by 0x7A50470: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x7D6F340: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x7D3F8C6: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x7D3F91B: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x7D44C73: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x7D2E7C6: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x7A27F26: ??? (in /usr/lib/libamdocl64.so)
    ==32316==
    ==32316== 278 (240 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 1,225 of 1,366
    ==32316==    at 0x4C2D51F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==32316==    by 0x63C2DD7: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x6339563: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x6339F61: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x634FD84: clGetPlatformIDs (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x4E3EBD1: ??? (in /usr/lib/libOpenCL.so.1.0.0)
    ==32316==    by 0x4E3FE83: clGetPlatformIDs (in /usr/lib/libOpenCL.so.1.0.0)
    ==32316==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==32316==    by 0x10B143: main (query.cpp:69)
    ==32316==
    ==32316== 286 (248 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 1,227 of 1,366
    ==32316==    at 0x4C2D51F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==32316==    by 0x63C2F09: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x6339563: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x6339F61: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x634FD84: clGetPlatformIDs (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x4E3EBD1: ??? (in /usr/lib/libOpenCL.so.1.0.0)
    ==32316==    by 0x4E3FE83: clGetPlatformIDs (in /usr/lib/libOpenCL.so.1.0.0)
    ==32316==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==32316==    by 0x10B143: main (query.cpp:69)
    ==32316==
    ==32316== 512 bytes in 1 blocks are definitely lost in loss record 1,252 of 1,366
    ==32316==    at 0x4C2D51F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==32316==    by 0x63EEAA5: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x63F4526: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x63F4B73: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x6339486: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x6339F61: ??? (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x634FD84: clGetPlatformIDs (in /opt/intel/opencl-runtime/lib64/libintelocl.so)
    ==32316==    by 0x4E3EBD1: ??? (in /usr/lib/libOpenCL.so.1.0.0)
    ==32316==    by 0x4E3FE83: clGetPlatformIDs (in /usr/lib/libOpenCL.so.1.0.0)
    ==32316==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==32316==    by 0x10B143: main (query.cpp:69)
    ==32316==
    ==32316== 1,520 bytes in 10 blocks are definitely lost in loss record 1,290 of 1,366
    ==32316==    at 0x4C2CEBF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==32316==    by 0xD45BEBD: amdcl::scCompileImpl::Text2Stream(amdcl::_il_string_rec const&, amdcl::_il_binary_rec&) (in /usr/lib/libamdocl12cl64.so)
    ==32316==    by 0xD463218: amdcl::AMDIL::toBinary(char const*, unsigned long*) (in /usr/lib/libamdocl12cl64.so)
    ==32316==    by 0xD4646BD: amdcl::AMDIL::compile(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, amdcl::scCompileBase*) (in /usr/lib/libamdocl12cl64.so)
    ==32316==    by 0xD46306F: amdcl::AMDIL::compile(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >) (in /usr/lib/libamdocl12cl64.so)
    ==32316==    by 0xCF80AB3: AMDILFEToISA(_acl_loader_data_0_8**, char const*, unsigned long) (in /usr/lib/libamdocl12cl64.so)
    ==32316==    by 0xCF828B4: if_aclCompile(_acl_compiler_rec_0_8_1*, _acl_bif_rec_0_8_1*, char const*, _acl_type_enum_0_8, _acl_type_enum_0_8, void (*)(char const*, unsigned long)) (in /usr/lib/libamdocl12cl64.so)
    ==32316==    by 0x82C35D8: aclCompile (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x79930E7: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x799375F: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x79A2111: ??? (in /usr/lib/libamdocl64.so)
    ==32316==    by 0x79A4929: ??? (in /usr/lib/libamdocl64.so)

    As you can see, I have tried three different ways to query the platforms (t0, t1 using the C API, t2 using the C++ API). How can I remove the memory leaks in my code? Am I doing something wrong?

    From those 231 errors from 76 contexts, my file appears in just 4 (C) or 3 (C++) messages (`grep 'cpp' query_t1.memcheck` vs `grep 'lost' query_t1.memcheck`). So, how can I remove those memory leaks if are not leaked from my code? Can I do something there?

    Example:

    Code :
    ==32316==    by 0x10B143: main (query.cpp:69)
    ==32316==    by 0x10B143: main (query.cpp:69)
    ==32316==    by 0x10B143: main (query.cpp:69)
     
    ==32316== 8 bytes in 1 blocks are definitely lost in loss record 53 of 1,366
    ==32316== 8 bytes in 1 blocks are definitely lost in loss record 54 of 1,366
    ==32316== 8 bytes in 1 blocks are definitely lost in loss record 55 of 1,366
    ==32316== 8 bytes in 1 blocks are definitely lost in loss record 56 of 1,366
    ==32316== 20 bytes in 1 blocks are definitely lost in loss record 94 of 1,366
    ==32316== 168 (144 direct, 24 indirect) bytes in 1 blocks are definitely lost in loss record 1,201 of 1,366
    ==32316== 278 (240 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 1,225 of 1,366
    ==32316== 286 (248 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 1,227 of 1,366
    ==32316== 512 bytes in 1 blocks are definitely lost in loss record 1,252 of 1,366
    ==32316== 1,520 bytes in 10 blocks are definitely lost in loss record 1,290 of 1,366
    ==32316== 1,584 (240 direct, 1,344 indirect) bytes in 1 blocks are definitely lost in loss record 1,291 of 1,366
    ==32316==    definitely lost: 2,956 bytes in 20 blocks
    ==32316==    indirectly lost: 1,444 bytes in 9 blocks
    ==32316==      possibly lost: 0 bytes in 0 blocks

  2. #2
    Senior Member
    Join Date
    Apr 2015
    Posts
    321
    In case this is AMD driver, they responded that Valgrind is wrong. We don't have a choice other than believe them, I guess.

  3. #3
    Junior Member
    Join Date
    May 2017
    Posts
    5
    Thank you. I did the same example with an Intel CPU (Intel Driver) and I got 26 errors:

    In all cases (t0, t1 with .h, t2 with .hpp):

    Code :
    ==2208== LEAK SUMMARY:
    ==2208==    definitely lost: 1,072 bytes in 5 blocks
    ==2208==    indirectly lost: 76 bytes in 2 blocks
    ==2208==      possibly lost: 368 bytes in 1 blocks
    ==2208==    still reachable: 60,016 bytes in 209 blocks
    ==2208==         suppressed: 0 bytes in 0 blocks
    ==2208== Reachable blocks (those to which a pointer was found) are not shown.
    ==2208== To see them, rerun with: --leak-check=full --show-leak-kinds=all
    ==2208==
    ==2208== For counts of detected and suppressed errors, rerun with: -v
    ==2208== ERROR SUMMARY: 26 errors from 20 contexts (suppressed: 0 from 0)

    Some errors (from the t2):

    Code :
    ==2304== Conditional jump or move depends on uninitialised value(s)
    ==2304==    at 0x7B2B183: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x7B0DEA2: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
    ==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==2304==    by 0x10B143: main (query.cpp:69)
    ==2304==  Uninitialised value was created by a heap allocation
    ==2304==    at 0x4C2D7FF: operator new(unsigned long, std::nothrow_t const&) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==2304==    by 0x7B0DE8A: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
    ==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==2304==    by 0x10B143: main (query.cpp:69)
    ==2304== Conditional jump or move depends on uninitialised value(s)
    ==2304==    at 0x7B0DED7: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
    ==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==2304==    by 0x10B143: main (query.cpp:69)
    ==2304==  Uninitialised value was created by a heap allocation
    ==2304==    at 0x4C2D7FF: operator new(unsigned long, std::nothrow_t const&) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==2304==    by 0x7B0DE8A: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
    ==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==2304==    by 0x10B143: main (query.cpp:69)
    ==2304== Conditional jump or move depends on uninitialised value(s)
    ==2304==    at 0x7B0C988: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x7B0CE6C: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x7B61D9F: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x400FB92: _dl_fini (in /usr/lib/ld-2.26.so)
    ==2304==    by 0x5962487: __run_exit_handlers (in /usr/lib/libc-2.26.so)
    ==2304==    by 0x59624D9: exit (in /usr/lib/libc-2.26.so)
    ==2304==    by 0x594BF70: (below main) (in /usr/lib/libc-2.26.so)
    ==2304==  Uninitialised value was created by a heap allocation
    ==2304==    at 0x4C2D7FF: operator new(unsigned long, std::nothrow_t const&) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==2304==    by 0x7B0DE8A: ??? (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x7AF90AA: clIcdGetPlatformIDsKHR (in /opt/intel/opencl/libigdrcl.so)
    ==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
    ==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==2304==    by 0x10B143: main (query.cpp:69)
    ==2304== 8 bytes in 1 blocks are definitely lost in loss record 5 of 171
    ==2304==    at 0x4C2CEFF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==2304==    by 0x4E3B7C8: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
    ==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    ==2304==    by 0x10B143: main (query.cpp:69)
    ==2304==
    ==2304== 64 bytes in 1 blocks are definitely lost in loss record 136 of 171
    ==2304==    at 0x4C2CEFF: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==2304==    by 0x4014015: dl_open_worker (in /usr/lib/ld-2.26.so)
    ==2304==    by 0x5A5D143: _dl_catch_error (in /usr/lib/libc-2.26.so)
    ==2304==    by 0x4013319: _dl_open (in /usr/lib/ld-2.26.so)
    ==2304==    by 0x5F01E85: ??? (in /usr/lib/libdl-2.26.so)
    ==2304==    by 0x5A5D143: _dl_catch_error (in /usr/lib/libc-2.26.so)
    ==2304==    by 0x5F02586: ??? (in /usr/lib/libdl-2.26.so)
    ==2304==    by 0x5F01F21: dlopen (in /usr/lib/libdl-2.26.so)
    ==2304==    by 0x758EDE1: ??? (in /opt/intel/opencl/libtbbmalloc.so.2)
    ==2304==    by 0x400F519: call_init.part.0 (in /usr/lib/ld-2.26.so)
    ==2304==    by 0x400F625: _dl_init (in /usr/lib/ld-2.26.so)
    ==2304==    by 0x4013AFD: dl_open_worker (in /usr/lib/ld-2.26.so)
    ==2304==
    ==2304== 278 (240 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 149 of 171
    ==2304==    at 0x4C2D56F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==2304==    by 0x69FE787: ??? (in /opt/intel/opencl/libintelocl.so)
    ==2304==    by 0x69750C0: ??? (in /opt/intel/opencl/libintelocl.so)
    ==2304==    by 0x6975911: ??? (in /opt/intel/opencl/libintelocl.so)
    ==2304==    by 0x698B734: clGetPlatformIDs (in /opt/intel/opencl/libintelocl.so)
    ==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
    ==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
    ===2304== 286 (248 direct, 38 indirect) bytes in 1 blocks are definitely lost in loss record 150 of 171
    ==2304==    at 0x4C2D56F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==2304==    by 0x69FE8B9: ??? (in /opt/intel/opencl/libintelocl.so)
    ==2304==    by 0x69750C0: ??? (in /opt/intel/opencl/libintelocl.so)
    ==2304==    by 0x6975911: ??? (in /opt/intel/opencl/libintelocl.so)
    ==2304==    by 0x698B734: clGetPlatformIDs (in /opt/intel/opencl/libintelocl.so)
    ==2304==    by 0x65284E3: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x6508F1F: ??? (in /opt/intel/opencl/libIntelOpenCL.so)
    ==2304==    by 0x4E3B77D: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x4E3D6CE: ??? (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x5CF1DBE: __pthread_once_slow (in /usr/lib/libpthread-2.26.so)
    ==2304==    by 0x4E3BD20: clGetPlatformIDs (in /opt/intel/opencl/libOpenCL.so.1)
    ==2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)
    =2304==    by 0x10B77C: cl::Platform::get(std::vector<cl::Platform, std::allocator<cl::Platform> >*) (cl.hpp:2338)



    Do you see something wrong in my code? I mean, some memory leak? Do I use correctly the calls?

    If you don't mind, I put here a longer example, in case you find something weird/not properly used:

    Code :
      string info_buffer;
      uint info_buffer_size = 1024;
      info_buffer.reserve(info_buffer_size);
      cl_uint num_platforms;
     
      CL_CHECK_ERROR(clGetPlatformIDs(0, NULL, &num_platforms));
      // platforms = unique_ptr<cl_platform_id>(new cl_platform_id[num_platforms]);
      unique_ptr<cl_platform_id[]> platforms(new cl_platform_id[num_platforms]);
     
      CL_CHECK_ERROR(clGetPlatformIDs(num_platforms, platforms.get(), NULL));
      for (uint i=0; i<num_platforms; i++){
        cl_platform_id platform = platforms[i];
        size_t size;
        CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size));
        if (size > info_buffer_size){
          info_buffer.reserve(size);
        }
        info_buffer.resize(size); // works also with size - 1
        CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, info_buffer.data(), NULL));
        string_erase_null(info_buffer);
        if (Show::showIfMore(show_info)) {
          cout << "platform: " << info_buffer << "\n";
        }
     
        cl_uint num_devices;
        CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices));
        unique_ptr<cl_device_id[]> devices(new cl_device_id[num_devices]);
        CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices.get(), NULL));
        for (uint j=0; j<num_devices; j++){
          cl_device_id device = devices[j];
          CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &size));
          if (size > info_buffer_size){
            info_buffer.reserve(size);
          }
          info_buffer.resize(size); // works also with size - 1
          CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_NAME, size, info_buffer.data(), NULL));
          string_erase_null(info_buffer);
          if (Show::showIfMore(show_info)) {
            cout << "  device: " << info_buffer << "\n";
          }
          CL_CHECK_ERROR(clReleaseDevice(device));
        }
     
      }
     
      if (Show::showIfMore(show_info)) {
        cout << "num platforms: " << num_platforms << "\n";
      }
      if (sel_platform >= num_platforms) {
        sel_platform = num_platforms - 1;
        cout << "sel_platform changed to: " << sel_platform << "(to fit number of platforms)\n";
      }
     
      cl_platform_id platform = platforms[sel_platform];
     
      size_t size;
      CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size));
      if (size > info_buffer_size){
        info_buffer.reserve(size);
      }
      info_buffer.resize(size); // works also with size - 1
      CL_CHECK_ERROR(clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, info_buffer.data(), NULL));
      string_erase_null(info_buffer);
      if (Show::showIfLessOrMore(show_info)) {
        cout << "Selected platform: " << info_buffer << "\n";
      }
     
      cl_uint num_devices;
      CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices));
      if (Show::showIfMore(show_info)) {
        cout << "num devices in selected platform: " << num_devices << "\n";
      }
      unique_ptr<cl_device_id[]> devices(new cl_device_id[num_devices]);
      CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices.get(), NULL));
     
      if (sel_device >= num_devices) {
        sel_device = num_devices - 1;
        cout << "sel_device changed to: " << sel_device << " (to fit number of devices)\n";
      }
     
      cl_device_id device = devices[sel_device];
     
      CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &size));
      if (size > info_buffer_size){
        info_buffer.reserve(size);
      }
      info_buffer.resize(size); // works also with size - 1
      CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_NAME, size, info_buffer.data(), NULL));
      string_erase_null(info_buffer);
      if (Show::showIfLessOrMore(show_info)) {
        cout << "Selected device: " << info_buffer << "\n";
      }
     
      // 2) context
      cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &cl_err);
      CL_CHECK_ERROR(cl_err);
     
     
      // buffers
      cl_int buffer_flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
     
      cl_mem a_buffer = clCreateBuffer(context, buffer_flags, problem_size * sizeof(cl_uchar4), a_array.data(), &cl_err);
      CL_CHECK_ERROR(cl_err, "a_buffer");
     
      cl_mem b_buffer = clCreateBuffer(context, buffer_flags, gaussian._filter_total_size * sizeof(cl_float), b_array.data(), &cl_err);
      CL_CHECK_ERROR(cl_err, "b_buffer");
     
      cl_mem c_buffer = clCreateBuffer(context, buffer_flags, problem_size * sizeof(cl_uchar4), c_array.data(), &cl_err);
      CL_CHECK_ERROR(cl_err, "c_buffer");
     
      // kernel
      unique_ptr<const char*[]> source_codes(new const char*[1]{kernelstr.data()});
      unique_ptr<const size_t[]> source_lengths(new const size_t[1]{kernelstr.length()});
      cl_program program = clCreateProgramWithSource(context, 1, source_codes.get(), source_lengths.get(), &cl_err);
      CL_CHECK_ERROR(cl_err);
      // cl::Program::Sources sources;
      // sources.push_back({kernelstr.c_str(), kernelstr.length()});
     
      cl_err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
      // cl::Program program(context, sources);
      // cl_err = program.build({device});
      if (cl_int cl_build_err = cl_err; cl_err != CL_SUCCESS) {
        CL_CHECK_ERROR(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &size));
        if (size > info_buffer_size){
          info_buffer.reserve(size);
        }
        info_buffer.resize(size);
        CL_CHECK_ERROR(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, size, info_buffer.data(), NULL));
        string_erase_null(info_buffer);
        if (Show::showIfLessOrMore(show_info)) {
          cout << "Selected platform: " << info_buffer << "\n";
        }
        cout << " Error building: " << info_buffer << "\n";
        CL_CHECK_ERROR(cl_build_err);
      }
     
      cl_kernel kernel = clCreateKernel(program, "gaussian_blur", &cl_err);
      CL_CHECK_ERROR(cl_err, "kernel");
     
      cl_err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &c_buffer);
      CL_CHECK_ERROR(cl_err, "kernel arg c_buffer");
     
      cl_err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &a_buffer);
      CL_CHECK_ERROR(cl_err, "kernel arg a_buffer");
     
      cl_err = clSetKernelArg(kernel, 2, sizeof(cl_int), &gaussian._height);
      CL_CHECK_ERROR(cl_err, "kernel arg height");
     
      cl_err = clSetKernelArg(kernel, 3, sizeof(cl_int), &gaussian._width);
      CL_CHECK_ERROR(cl_err, "kernel arg width");
     
      cl_err = clSetKernelArg(kernel, 4, sizeof(cl_mem), &b_buffer);
      CL_CHECK_ERROR(cl_err, "kernel arg b_buffer");
     
      cl_err = clSetKernelArg(kernel, 5, sizeof(cl_int), &gaussian._filter_width);
      CL_CHECK_ERROR(cl_err, "kernel arg filter width");
     
      // queue
      cl_command_queue_properties queue_props = 0;
      cl_command_queue queue = clCreateCommandQueue(context, device, queue_props, &cl_err);
      // cl::CommandQueue queue(context, device, 0, &cl_err);
      CL_CHECK_ERROR(cl_err, "CommandQueue queue");
     
      // write
      CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, a_buffer, CL_TRUE, 0, sizeof(cl_uchar4) * problem_size, a_array.data(), 0, NULL, NULL));
     
      CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, b_buffer, CL_TRUE, 0, sizeof(cl_float) * gaussian._filter_total_size, b_array.data(), 0, NULL, NULL));
     
      const size_t gwo = {0};
      const size_t gws = {problem_size};
      const size_t lws = {CL_LWS};
      CL_CHECK_ERROR(clEnqueueNDRangeKernel(queue, kernel, 1, &gwo, &gws, &lws, 0, NULL, NULL));
     
      CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, c_buffer, CL_TRUE, 0, sizeof(cl_uchar4) * problem_size, c_array.data(), 0, NULL, NULL));
     
      CL_CHECK_ERROR(clFinish(queue));
     
      CL_CHECK_ERROR(clReleaseCommandQueue(queue));
      CL_CHECK_ERROR(clReleaseKernel(kernel));
      CL_CHECK_ERROR(clReleaseProgram(program));
      CL_CHECK_ERROR(clReleaseMemObject(c_buffer));
      CL_CHECK_ERROR(clReleaseMemObject(b_buffer));
      CL_CHECK_ERROR(clReleaseMemObject(a_buffer));
      CL_CHECK_ERROR(clReleaseContext(context));
      CL_CHECK_ERROR(clReleaseDevice(device));

    Regarding memory leaks, of course. I don't paste all the surrounding code, but just the OpenCL calls.

  4. #4
    Senior Member
    Join Date
    Apr 2015
    Posts
    321
    AMD CodeXL shows missing object releases if this will put your mind at ease. I never tried it under Linux though.

Tags for this Thread

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  
Proudly hosted by Digital Ocean