Actions
Bug #19110
closedsegv in OpTracker::dump_ops_in_flight
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
Thread 35 (Thread 0x7feca66b5700 (LWP 10660)): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135 #1 0x00007fecc27df649 in _L_lock_909 () from /lib/x86_64-linux-gnu/libpthread.so.0 ---Type <return> to continue, or q <return> to quit--- #2 0x00007fecc27df470 in __GI___pthread_mutex_lock (mutex=0x7feccf5db820) at ../nptl/pthread_mutex_lock.c:79 #3 0x00007fecc4a15978 in Mutex::Lock (this=this@entry=0x7feccf5db810, no_lockdep=no_lockdep@entry=false) at /build/ceph-12.0.0-839-g3f77257/src/common/Mutex.cc:110 #4 0x00007fecc475d506 in Locker (m=..., this=<synthetic pointer>) at /build/ceph-12.0.0-839-g3f77257/src/common/Mutex.h:115 #5 OpTracker::unregister_inflight_op (this=0x7feccf8fd008, i=0x7fecdb494b40) at /build/ceph-12.0.0-839-g3f77257/src/common/TrackedOp.cc:191 #6 0x00007fecc45a1fa6 in intrusive_ptr_release (o=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/src/common/TrackedOp.h:293 #7 ~intrusive_ptr (this=0x7feca66b3850, __in_chrg=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/smart_ptr/intrusive_ptr.hpp:97 #8 internal_visit<boost::intrusive_ptr<OpRequest> > (this=<optimized out>, operand=...) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/variant/variant.hpp:385 #9 visitation_impl_invoke_impl<boost::detail::variant::destroyer, void*, boost::intrusive_ptr<OpRequest> > (visitor=..., storage=0x7feca66b3850) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/variant/detail/visitation_impl.hpp:114 #10 visitation_impl_invoke<boost::detail::variant::destroyer, void*, boost::intrusive_ptr<OpRequest>, boost::variant<boost::intrusive_ptr<OpRequest>, PGSnapTrim, PGScrub, PGRecovery>::has_fallback_type_> (t=0x0, storage=0x7feca66b3850, visitor=<synthetic pointer>, internal_which=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/variant/detail/visitation_impl.hpp:157 #11 visitation_impl<mpl_::int_<0>, boost::detail::variant::visitation_impl_step<boost::mpl::l_iter<boost::mpl::l_item<mpl_::long_<4l>, boost::intrusive_ptr<OpRequest>, boost::mpl::l_item<mpl_::long_<3l>, PGSnapTrim, boost::mpl::l_item<mpl_:---Type <return> to continue, or q <return> to quit--- :long_<2l>, PGScrub, boost::mpl::l_item<mpl_::long_<1l>, PGRecovery, boost::mpl::l_end> > > > >, boost::mpl::l_iter<boost::mpl::l_end> >, boost::detail::variant::destroyer, void*, boost::variant<boost::intrusive_ptr<OpRequest>, PGSnapTrim, PGScrub, PGRecovery>::has_fallback_type_> (no_backup_flag=..., storage=0x7feca66b3850, visitor=<synthetic pointer>, logical_which=<optimized out>, internal_which=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/variant/detail/visitation_impl.hpp:238 #12 internal_apply_visitor_impl<boost::detail::variant::destroyer, void*> ( storage=0x7feca66b3850, visitor=<synthetic pointer>, logical_which=<optimized out>, internal_which=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/variant/variant.hpp:2389 #13 internal_apply_visitor<boost::detail::variant::destroyer> ( visitor=<synthetic pointer>, this=0x7feca66b3848) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/variant/variant.hpp:2400 #14 destroy_content (this=0x7feca66b3848) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/variant/variant.hpp:1412 #15 ~variant (this=0x7feca66b3848, __in_chrg=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/obj-x86_64-linux-gnu/boost/include/boost/variant/variant.hpp:1419 #16 ~PGQueueable (this=0x7feca66b3848, __in_chrg=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/src/osd/OSD.h:363 #17 std::pair<boost::intrusive_ptr<PG>, PGQueueable>::~pair ( this=0x7feca66b3840, __in_chrg=<optimized out>) at /usr/include/c++/4.8/bits/stl_pair.h:96 #18 0x00007fecc455ec92 in OSD::ShardedOpWQ::_process (this=<optimized out>, thread_index=<optimized out>, hb=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/src/osd/OSD.cc:9047 ---Type <return> to continue, or q <return> to quit--- #19 0x00007fecc4a54735 in ShardedThreadPool::shardedthreadpool_worker ( this=0x7feccf8fc950, thread_index=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/src/common/WorkQueue.cc:356 #20 0x00007fecc4a56890 in ShardedThreadPool::WorkThreadSharded::entry ( this=<optimized out>) at /build/ceph-12.0.0-839-g3f77257/src/common/WorkQueue.h:685 #21 0x00007fecc27dd184 in start_thread (arg=0x7feca66b5700) at pthread_create.c:312 #22 0x00007fecc18cd37d in clone ()
vs
Thread 1 (Thread 0x7fecbd5b1700 (LWP 10587)): #0 0x00007fecc27e51fb in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/pt-raise.c:37 #1 0x00007fecc49f8d75 in reraise_fatal (signum=6) at /build/ceph-12.0.0-839-g3f77257/src/global/signal_handler.cc:74 #2 handle_fatal_signal (signum=6) at /build/ceph-12.0.0-839-g3f77257/src/global/signal_handler.cc:138 #3 <signal handler called> #4 0x00007fecc1809c37 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 #5 0x00007fecc180d028 in __GI_abort () at abort.c:89 #6 0x00007fecc2114535 in __gnu_cxx::__verbose_terminate_handler() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6 #7 0x00007fecc21126d6 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6 #8 0x00007fecc2112703 in std::terminate() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6 #9 0x00007fecc2112922 in __cxa_throw () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6 #10 0x00007fecc21641c7 in std::__throw_logic_error(char const*) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6 #11 0x00007fecc216f9f1 in char* std::string::_S_construct<char const*>(char cons---Type <return> to continue, or q <return> to quit--- t*, char const*, std::allocator<char> const&, std::forward_iterator_tag) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6 #12 0x00007fecc216fd88 in std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(char const*, std::allocator<char> const&) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6 #13 0x00007fecc4757ae7 in TrackedOp::dump (this=this@entry=0x7fecdb494b40, now=..., now@entry=..., f=f@entry=0x7fecd07f7d40) at /build/ceph-12.0.0-839-g3f77257/src/common/TrackedOp.cc:363 #14 0x00007fecc4758327 in OpTracker::dump_ops_in_flight ( this=this@entry=0x7feccf8fd008, f=f@entry=0x7fecd07f7d40, print_only_blocked=print_only_blocked@entry=false) at /build/ceph-12.0.0-839-g3f77257/src/common/TrackedOp.cc:149 #15 0x00007fecc4562d73 in OSD::asok_command (this=0x7feccf8fc000, Python Exception <class 'IndexError'> list index out of range: command="dump_ops_in_flight", cmdmap=std::map with 1 elements, format="json-pretty", ss=...) at /build/ceph-12.0.0-839-g3f77257/src/osd/OSD.cc:1828 #16 0x00007fecc45a83e8 in OSDSocketHook::call (this=this@entry=0x7feccf5a64d0, Python Exception <class 'IndexError'> list index out of range: command="dump_ops_in_flight", cmdmap=std::map with 1 elements, format="json-pretty", out=...) at /build/ceph-12.0.0-839-g3f77257/src/osd/OSD.cc:1801 #17 0x00007fecc4a1c403 in AdminSocket::do_accept ( this=this@entry=0x7feccf728000) at /build/ceph-12.0.0-839-g3f77257/src/common/admin_socket.cc:415 #18 0x00007fecc4a1db18 in AdminSocket::entry (this=0x7feccf728000) at /build/ceph-12.0.0-839-g3f77257/src/common/admin_socket.cc:280 #19 0x00007fecc27dd184 in start_thread (arg=0x7fecbd5b1700) at pthread_create.c:312 #20 0x00007fecc18cd37d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
Updated by Sage Weil about 7 years ago
- Status changed from New to 12
- Assignee set to Sage Weil
- Priority changed from Normal to Immediate
Actually i don't think this is a race. I think it's just unsafe clearing of desc:
(gdb) p desc $9 = 0x0 (gdb) p desc_str $10 = "osd_op(osd.4.5:765 2.2 2.930f98a (undecoded) ondisk+read+ignore_cache+ignore_overlay+flush+map_snap_clone+known_if_redirected e94)"
from this code
const char *get_desc() const { if (!desc) { Mutex::Locker l(lock); _gen_desc(); } return desc; } private: void _gen_desc() const { ostringstream ss; _dump_op_descriptor_unlocked(ss); desc_str = ss.str(); desc = desc_str.c_str(); } public: void reset_desc() { desc = nullptr; }
which has clearly unsafe locking!
Updated by Sage Weil about 7 years ago
/a/sage-2017-02-27_21:39:28-rados-wip-sage-testing---basic-smithi/866800
Updated by Sage Weil about 7 years ago
- Status changed from 12 to Fix Under Review
Updated by Nathan Cutler about 7 years ago
Updated by Sage Weil about 7 years ago
- Status changed from Fix Under Review to Resolved
Actions