Actions
Bug #64637
openLeakPossiblyLost in BlueStore::_do_write_small() in osd
Status:
New
Priority:
Normal
Assignee:
-
Category:
-
Target version:
-
% Done:
0%
Source:
Tags:
Backport:
reef
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(RADOS):
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
/a/yuriw-2024-02-22_21:33:08-rados-wip-yuri8-testing-2024-02-22-0734-reef-distro-default-smithi/7571350
<fatal_signal>
<tid>72</tid>
<threadname>tp_osd_tp</threadname>
<signo>6</signo>
<signame>SIGABRT</signame>
<sicode>-6</sicode>
<stack>
<frame>
<ip>0x54EC54C</ip>
<obj>/usr/lib64/libc.so.6</obj>
<fn>__pthread_kill_implementation</fn>
</frame>
<frame>
<ip>0x549FD05</ip>
<obj>/usr/lib64/libc.so.6</obj>
<fn>raise</fn>
</frame>
<frame>
<ip>0x5473884</ip>
<obj>/usr/lib64/libc.so.6</obj>
<fn>abort</fn>
</frame>
<frame>
<ip>0x560F48</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>ceph::__ceph_abort(char const*, int, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/common</dir>
<file>assert.cc</file>
<line>198</line>
</frame>
<frame>
<ip>0xB9B309</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>ceph::HeartbeatMap::_check(ceph::heartbeat_handle_d const*, char const*, std::chrono::time_point<ceph::coarse_mono_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/common</dir>
<file>HeartbeatMap.cc</file>
<line>85</line>
</frame>
<frame>
<ip>0xB9B3CF</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>ceph::HeartbeatMap::reset_timeout(ceph::heartbeat_handle_d*, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/common</dir>
<file>HeartbeatMap.cc</file>
<line>97</line>
</frame>
<frame>
<ip>0x6D8D36</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>UnknownInlinedFun</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/common</dir>
<file>WorkQueue.cc</file>
<line>52</line>
</frame>
<frame>
<ip>0x6D8D36</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>OSD::advance_pg(unsigned int, PG*, ThreadPool::TPHandle&, PeeringCtx&)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/osd</dir>
<file>OSD.cc</file>
<line>8934</line>
</frame>
<frame>
<ip>0x6E44C6</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>OSD::dequeue_peering_evt(OSDShard*, PG*, std::shared_ptr<PGPeeringEvent>, ThreadPool::TPHandle&)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/osd</dir>
<file>OSD.cc</file>
<line>9778</line>
</frame>
<frame>
<ip>0x8D9560</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>ceph::osd::scheduler::PGPeeringItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/osd/scheduler</dir>
<file>OpSchedulerItem.cc</file>
<line>43</line>
</frame>
<frame>
<ip>0x6F98B6</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>UnknownInlinedFun</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/osd/scheduler</dir>
<file>OpSchedulerItem.h</file>
<line>139</line>
</frame>
<frame>
<ip>0x6F98B6</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/osd</dir>
<file>OSD.cc</file>
<line>11189</line>
</frame>
<frame>
<ip>0xBAB9AA</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>ShardedThreadPool::shardedthreadpool_worker(unsigned int)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/common</dir>
<file>WorkQueue.cc</file>
<line>313</line>
</frame>
<frame>
<ip>0xBABF13</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>ShardedThreadPool::WorkThreadSharded::entry()</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/common</dir>
<file>WorkQueue.h</file>
<line>643</line>
</frame>
<frame>
<ip>0x54EA801</ip>
<obj>/usr/lib64/libc.so.6</obj>
<fn>start_thread</fn>
</frame>
<frame>
<ip>0x548A313</ip>
<obj>/usr/lib64/libc.so.6</obj>
<fn>clone</fn>
</frame>
</stack>
</fatal_signal>
<status>
<state>FINISHED</state>
<time>00:00:10:36.877 </time>
</status>
<error>
<unique>0x20212</unique>
<tid>1</tid>
<kind>Leak_PossiblyLost</kind>
<xwhat>
<text>16 bytes in 1 blocks are possibly lost in loss record 339 of 6,358</text>
<leakedbytes>16</leakedbytes>
<leakedblocks>1</leakedblocks>
</xwhat>
<stack>
<frame>
<ip>0x48462F3</ip>
<obj>/usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so</obj>
<fn>operator new[](unsigned long)</fn>
<dir>/builddir/build/BUILD/valgrind-3.21.0/coregrind/m_replacemalloc</dir>
<file>vg_replace_malloc.c</file>
<line>714</line>
</frame>
<frame>
<ip>0xAFB86B</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>UnknownInlinedFun</fn>
<dir>/usr/include/c++/11/bits</dir>
<file>alloc_traits.h</file>
<line>318</line>
</frame>
<frame>
<ip>0xAFB86B</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>UnknownInlinedFun</fn>
<dir>/usr/include/c++/11/bits</dir>
<file>stl_vector.h</file>
<line>346</line>
</frame>
<frame>
<ip>0xAFB86B</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>void std::vector<bluefs_extent_t, mempool::pool_allocator<(mempool::pool_index_t)15, bluefs_extent_t> >::_M_realloc_insert<bluefs_extent_t>(__gnu_cxx::__normal_iterator<bluefs_extent_t*, std::vector<bluefs_extent_t, mempool::pool_allocator<(mempool::pool_index_t)15, bluefs_extent_t> > >, bluefs_extent_t&&)</fn>
<dir>/usr/include/c++/11/bits</dir>
<file>vector.tcc</file>
<line>440</line>
</frame>
<frame>
<ip>0xAFB912</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>bluestore_pextent_t& std::vector<bluestore_pextent_t, mempool::pool_allocator<(mempool::pool_index_t)5, bluestore_pextent_t> >::emplace_back<bluestore_pextent_t>(bluestore_pextent_t&&)</fn>
<dir>/usr/include/c++/11/bits</dir>
<file>vector.tcc</file>
<line>121</line>
</frame>
<frame>
<ip>0xADB63B</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>UnknownInlinedFun</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>BlueStore.cc</file>
<line>14965</line>
</frame>
<frame>
<ip>0xADB63B</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>UnknownInlinedFun</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>bluestore_types.h</file>
<line>703</line>
</frame>
<frame>
<ip>0xADB63B</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>UnknownInlinedFun</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>bluestore_types.h</file>
<line>744</line>
</frame>
<frame>
<ip>0xADB63B</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>BlueStore::_do_write_small(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v15_2_0::list::iterator&, BlueStore::WriteContext*)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>BlueStore.cc</file>
<line>14962</line>
</frame>
<frame>
<ip>0xADBCD4</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>BlueStore::_do_write_data(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v15_2_0::list&, BlueStore::WriteContext*)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>BlueStore.cc</file>
<line>15969</line>
</frame>
<frame>
<ip>0xADFCC2</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>BlueStore::_do_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v15_2_0::list&, unsigned int)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>BlueStore.cc</file>
<line>16173</line>
</frame>
<frame>
<ip>0xAE08F7</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>BlueStore::_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v15_2_0::list&, unsigned int)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>BlueStore.cc</file>
<line>16244</line>
</frame>
<frame>
<ip>0xAD2196</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>BlueStore::_txc_add_transaction(BlueStore::TransContext*, ceph::os::Transaction*)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>BlueStore.cc</file>
<line>14429</line>
</frame>
<frame>
<ip>0xAD31D2</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, std::vector<ceph::os::Transaction, std::allocator<ceph::os::Transaction> >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os/bluestore</dir>
<file>BlueStore.cc</file>
<line>14174</line>
</frame>
<frame>
<ip>0x66F3F7</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>ObjectStore::queue_transaction(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, ceph::os::Transaction&&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/os</dir>
<file>ObjectStore.h</file>
<line>229</line>
</frame>
<frame>
<ip>0x6D20F7</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>OSD::handle_osd_map(MOSDMap*)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/osd</dir>
<file>OSD.cc</file>
<line>8355</line>
</frame>
<frame>
<ip>0x6C72E7</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>OSD::ms_dispatch(Message*)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/osd</dir>
<file>OSD.cc</file>
<line>7367</line>
</frame>
<frame>
<ip>0x673E0A</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>Dispatcher::ms_dispatch2(boost::intrusive_ptr<Message> const&)</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/msg</dir>
<file>Dispatcher.h</file>
<line>124</line>
</frame>
<frame>
<ip>0xE409E1</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>UnknownInlinedFun</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/msg</dir>
<file>Messenger.h</file>
<line>715</line>
</frame>
<frame>
<ip>0xE409E1</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>DispatchQueue::entry()</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/msg</dir>
<file>DispatchQueue.cc</file>
<line>201</line>
</frame>
<frame>
<ip>0xCEEC80</ip>
<obj>/usr/bin/ceph-osd</obj>
<fn>DispatchQueue::DispatchThread::entry()</fn>
<dir>/usr/src/debug/ceph-18.2.1-638.g055f1d8d.el9.x86_64/src/msg</dir>
<file>DispatchQueue.h</file>
<line>101</line>
</frame>
<frame>
<ip>0x54EA801</ip>
<obj>/usr/lib64/libc.so.6</obj>
<fn>start_thread</fn>
</frame>
<frame>
<ip>0x548A313</ip>
<obj>/usr/lib64/libc.so.6</obj>
<fn>clone</fn>
</frame>
</stack>
</error>
Also in this failure were monitor leaks as tracked in https://tracker.ceph.com/issues/61774.
$ /a/yuriw-2024-02-22_21:33:08-rados-wip-yuri8-testing-2024-02-22-0734-reef-distro-default-smithi/7571350$ cat teuthology.log | grep "Exit "
2024-02-23T03:47:07.826 INFO:tasks.ceph.osd.6.smithi118.stderr:==00:00:10:38.516 34930== Exit program on first error (--exit-on-first-error=yes)
2024-02-23T03:52:20.384 INFO:tasks.ceph.mon.c.smithi033.stderr:==00:00:16:09.941 34675== Exit program on first error (--exit-on-first-error=yes)
2024-02-23T03:52:20.414 INFO:tasks.ceph.mon.b.smithi118.stderr:==00:00:16:09.951 34309== Exit program on first error (--exit-on-first-error=yes)
2024-02-23T03:52:25.812 INFO:tasks.ceph.mon.a.smithi033.stderr:==00:00:16:15.369 34672== Exit program on first error (--exit-on-first-error=yes)
Updated by Laura Flores 2 months ago
- Related to Bug #61774: centos 9 testing reveals rocksdb "Leak_StillReachable" memory leak in mons added
Updated by Laura Flores 2 months ago
Laura Flores wrote:
/a/yuriw-2024-02-22_21:33:08-rados-wip-yuri8-testing-2024-02-22-0734-reef-distro-default-smithi/7571350
[...]Also in this failure were monitor leaks as tracked in https://tracker.ceph.com/issues/61774.
[...]
This was in the teuthology log, which I missed the first time:
2024-02-23T03:47:05.266 INFO:tasks.ceph.osd.6.smithi118.stderr:*** Caught signal (Aborted) **
2024-02-23T03:47:05.266 INFO:tasks.ceph.osd.6.smithi118.stderr: in thread 2d499640 thread_name:tp_osd_tp
2024-02-23T03:47:05.402 INFO:tasks.ceph.osd.6.smithi118.stderr: ceph version 18.2.1-638-g055f1d8d (055f1d8d2e5cbd7e37bfd1e7bd8c27a3b6e775e2) reef (stable)
2024-02-23T03:47:05.402 INFO:tasks.ceph.osd.6.smithi118.stderr: 1: /lib64/libc.so.6(+0x54db0) [0x549fdb0]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 2: /lib64/libc.so.6(+0xa154c) [0x54ec54c]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 3: (ceph::HeartbeatMap::_check(ceph::heartbeat_handle_d const*, char const*, std::chrono::time_point<ceph::coarse_mono_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >)+0x226) [0xb9b2d6]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 4: (ceph::HeartbeatMap::reset_timeout(ceph::heartbeat_handle_d*, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >)+0x70) [0xb9b3d0]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 5: (OSD::advance_pg(unsigned int, PG*, ThreadPool::TPHandle&, PeeringCtx&)+0x867) [0x6d8d37]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 6: (OSD::dequeue_peering_evt(OSDShard*, PG*, std::shared_ptr<PGPeeringEvent>, ThreadPool::TPHandle&)+0x237) [0x6e44c7]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 7: (ceph::osd::scheduler::PGPeeringItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x51) [0x8d9561]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 8: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xd67) [0x6f98b7]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 9: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x25b) [0xbab9ab]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 10: ceph-osd(+0xaa3f14) [0xbabf14]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 11: /lib64/libc.so.6(+0x9f802) [0x54ea802]
2024-02-23T03:47:05.403 INFO:tasks.ceph.osd.6.smithi118.stderr: 12: clone()
Updated by Laura Flores 2 months ago
- Related to Bug #62992: Heartbeat crash in reset_timeout and clear_timeout added
Updated by Radoslaw Zarzynski 2 months ago
Looks like typical symptom of (CPU/memory) starvation.
Updated by Nitzan Mordechai about 2 months ago
- Related to Bug #64670: LibRadosAioEC.RoundTrip2 hang and pkill added
Actions