Actions
Bug #20808
closedosd deadlock: forced recovery
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(RADOS):
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
Thread 3 (Thread 0x7fe716733700 (LWP 506178)): #0 0x00007fe73e83d1bd in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00007fe73e838d02 in _L_lock_791 () from /lib64/libpthread.so.0 #2 0x00007fe73e838c08 in pthread_mutex_lock () from /lib64/libpthread.so.0 #3 0x00007fe741a772e8 in Mutex::Lock (this=this@entry=0x7fe74cd22de8, no_lockdep=no_lockdep@entry=false) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Mutex.cc:107 #4 0x00007fe7415237c1 in Locker (m=..., this=<synthetic pointer>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Mutex.h:115 #5 OSDService::finish_recovery_op (this=0x7fe74cd21860, pg=pg@entry=0x7fe74f57e000, soid=..., dequeue=dequeue@entry=false) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:9404 #6 0x00007fe7415a9ea8 in PG::finish_recovery_op (this=this@entry=0x7fe74f57e000, soid=..., dequeue=dequeue@entry=false) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PG.cc:2187 #7 0x00007fe74168704a in PrimaryLogPG::on_global_recover (this=0x7fe74f57e000, soid=..., stat_diff=..., is_delete=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PrimaryLogPG.cc:484 #8 0x00007fe7417e7661 in ReplicatedBackend::handle_push_reply (this=this@entry=0x7fe74cf13d80, peer=..., op=..., reply=0x7fe7503fb200) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/ReplicatedBackend.cc:2147 #9 0x00007fe7417ee631 in ReplicatedBackend::do_push_reply (this=this@entry=0x7fe74cf13d80, op=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/ReplicatedBackend.cc:954 #10 0x00007fe7417eea44 in ReplicatedBackend::_handle_message (this=0x7fe74cf13d80, op=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/ReplicatedBackend.cc:210 #11 0x00007fe741719000 in PGBackend::handle_message (this=<optimized out>, op=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PGBackend.cc:114 #12 0x00007fe741685bce in PrimaryLogPG::do_request (this=0x7fe74f57e000, op=..., handle=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PrimaryLogPG.cc:1737 #13 0x00007fe74151a099 in OSD::dequeue_op (this=0x7fe74cd20000, pg=..., op=..., handle=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:9494 #14 0x00007fe741786dc7 in PGQueueable::RunVis::operator() (this=this@entry=0x7fe716731030, op=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PGQueueable.cc:22 #15 0x00007fe74154533e in internal_visit<boost::intrusive_ptr<OpRequest> > (operand=..., this=<synthetic pointer>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/variant.hpp:1046 #16 visitation_impl_invoke_impl<boost::detail::variant::invoke_visitor<PGQueueable::RunVis>, void*, boost::intrusive_ptr<OpRequest> > (storage=0x7fe716731080, visitor=<synthetic pointer>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/detail/visitation_impl.hpp:114 #17 visitation_impl_invoke<boost::detail::variant::invoke_visitor<PGQueueable::RunVis>, void*, boost::intrusive_ptr<OpRequest>, boost::variant<boost::intrusive_ptr<OpRequest>, PGSnapTrim, PGScrub, PGRecovery>::has_fallback_type_> (t=0x0, storage=0x7fe716731080, visitor=<synthetic pointer>, internal_which=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/detail/visitation_impl.hpp:157 #18 visitation_impl<mpl_::int_<0>, boost::detail::variant::visitation_impl_step<boost::mpl::l_iter<boost::mpl::l_item<mpl_::long_<4l>, boost::intrusive_ptr<OpRequest>, boost::mpl::l_item<mpl_::long_<3l>, PGSnapTrim, boost::mpl::l_item<mpl_::long_<2l>, PGScrub, boost::mpl::l_item<mpl_::long_<1l>, PGRecovery, boost::mpl::l_end> > > > >, boost::mpl::l_iter<boost::mpl::l_end> >, boost::detail::variant::invoke_visitor<PGQueueable::RunVis>, void*, boost::variant<boost::intrusive_ptr<OpRequest>, PGSnapTrim, PGScrub, PGRecovery>::has_fallback_type_> (no_backup_flag=..., storage=0x7fe716731080, visitor=<synthetic pointer>, logical_which=<optimized out>, internal_which=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/detail/visitation_impl.hpp:238 #19 internal_apply_visitor_impl<boost::detail::variant::invoke_visitor<PGQueueable::RunVis>, void*> (storage=0x7fe716731080, visitor=<synthetic pointer>, logical_which=<optimized out>, internal_which=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/variant.hpp:2389 #20 internal_apply_visitor<boost::detail::variant::invoke_visitor<PGQueueable::RunVis> > (visitor=<synthetic pointer>, this=0x7fe716731078) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/variant.hpp:2400 ---Type <return> to continue, or q <return> to quit--- #21 apply_visitor<PGQueueable::RunVis> (visitor=..., this=0x7fe716731078) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/variant.hpp:2423 #22 apply_visitor<PGQueueable::RunVis, boost::variant<boost::intrusive_ptr<OpRequest>, PGSnapTrim, PGScrub, PGRecovery> > (visitable=..., visitor=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/detail/apply_visitor_unary.hpp:70 #23 run (handle=..., pg=..., osd=<optimized out>, this=0x7fe716731078) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PGQueueable.h:140 #24 OSD::ShardedOpWQ::_process (this=0x7fe74cd21240, thread_index=<optimized out>, hb=0x7fe74d348270) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:10277 #25 0x00007fe741a9cff9 in ShardedThreadPool::shardedthreadpool_worker (this=0x7fe74cd20a30, thread_index=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/WorkQueue.cc:339 #26 0x00007fe741a9ef90 in ShardedThreadPool::WorkThreadSharded::entry (this=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/WorkQueue.h:689 #27 0x00007fe73e836dc5 in start_thread () from /lib64/libpthread.so.0 #28 0x00007fe73d92a73d in clone () from /lib64/libc.so.6 Thread 87 (Thread 0x7fe727755700 (LWP 506144)): #0 0x00007fe73e83d1bd in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00007fe73e838d02 in _L_lock_791 () from /lib64/libpthread.so.0 #2 0x00007fe73e838c08 in pthread_mutex_lock () from /lib64/libpthread.so.0 #3 0x00007fe741a772e8 in Mutex::Lock (this=this@entry=0x7fe74d0b4510, no_lockdep=no_lockdep@entry=true) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Mutex.cc:107 #4 0x00007fe7415a4783 in PG::lock (this=this@entry=0x7fe74d0b4000, no_lockdep=no_lockdep@entry=true) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PG.cc:363 #5 0x00007fe7415c0606 in PG::change_recovery_force_mode (this=0x7fe74d0b4000, new_mode=new_mode@entry=1073741824, clear=clear@entry=false) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PG.cc:2046 #6 0x00007fe7414f6316 in OSDService::adjust_pg_priorities (this=this@entry=0x7fe74cd21860, pgs=std::vector of length 3, capacity 3 = {...}, newflags=newflags@entry=1) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:9269 #7 0x00007fe741512514 in OSD::handle_force_recovery (this=this@entry=0x7fe74cd20000, m=m@entry=0x7fe74d3e1b00) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:8971 #8 0x00007fe74153d3eb in OSD::_dispatch (this=this@entry=0x7fe74cd20000, m=m@entry=0x7fe74d3e1b00) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:7139 #9 0x00007fe74153d787 in OSD::ms_dispatch (this=0x7fe74cd20000, m=0x7fe74d3e1b00) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:6812 #10 0x00007fe741cb7472 in ms_deliver_dispatch (m=0x7fe74d3e1b00, this=0x7fe74cc20000) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/msg/Messenger.h:625 #11 DispatchQueue::entry (this=0x7fe74cc20180) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/msg/DispatchQueue.cc:197 #12 0x00007fe741b2258d in DispatchQueue::DispatchThread::entry (this=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/msg/DispatchQueue.h:101 #13 0x00007fe73e836dc5 in start_thread () from /lib64/libpthread.so.0 #14 0x00007fe73d92a73d in clone () from /lib64/libc.so.6 Thread 122 (Thread 0x7fe73178e700 (LWP 506084)): #0 0x00007fe73e83d1bd in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00007fe73e838d02 in _L_lock_791 () from /lib64/libpthread.so.0 #2 0x00007fe73e838c08 in pthread_mutex_lock () from /lib64/libpthread.so.0 #3 0x00007fe741a772e8 in Mutex::Lock (this=this@entry=0x7fe74cd22de8, no_lockdep=no_lockdep@entry=false) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Mutex.cc:107 #4 0x00007fe7414f67fa in Locker (m=..., this=<synthetic pointer>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Mutex.h:115 #5 OSDService::is_recovery_active (this=this@entry=0x7fe74cd21860) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:9425 #6 0x00007fe741510d3a in OSD::sched_scrub (this=this@entry=0x7fe74cd20000) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:7349 #7 0x00007fe741524238 in OSD::tick_without_osd_lock (this=0x7fe74cd20000) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:5168 #8 0x00007fe741554949 in Context::complete (this=0x7fe74c900f50, r=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/include/Context.h:70 #9 0x00007fe741a92764 in SafeTimer::timer_thread (this=0x7fe74cd20190) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Timer.cc:97 #10 0x00007fe741a9418d in SafeTimerThread::entry (this=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Timer.cc:30 #11 0x00007fe73e836dc5 in start_thread () from /lib64/libpthread.so.0 #12 0x00007fe73d92a73d in clone () from /lib64/libc.so.6 Thread 105 (Thread 0x7fe71af3c700 (LWP 506169)): #0 0x00007fe73e83d1bd in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00007fe73e838d02 in _L_lock_791 () from /lib64/libpthread.so.0 #2 0x00007fe73e838c08 in pthread_mutex_lock () from /lib64/libpthread.so.0 #3 0x00007fe741a772e8 in Mutex::Lock (this=this@entry=0x7fe74d0b4510, no_lockdep=no_lockdep@entry=false) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Mutex.cc:107 #4 0x00007fe7415a4783 in PG::lock (this=0x7fe74d0b4000, no_lockdep=no_lockdep@entry=false) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PG.cc:363 #5 0x00007fe741544af1 in OSD::ShardedOpWQ::_process (this=0x7fe74cd21240, thread_index=<optimized out>, hb=0x7fe74d3480c0) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:10154 #6 0x00007fe741a9cff9 in ShardedThreadPool::shardedthreadpool_worker (this=0x7fe74cd20a30, thread_index=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/WorkQueue.cc:339 #7 0x00007fe741a9ef90 in ShardedThreadPool::WorkThreadSharded::entry (this=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/WorkQueue.h:689 #8 0x00007fe73e836dc5 in start_thread () from /lib64/libpthread.so.0 #9 0x00007fe73d92a73d in clone () from /lib64/libc.so.6 Thread 1 (Thread 0x7fe716f34700 (LWP 506177)): #0 0x00007fe73e83e23b in raise () from /lib64/libpthread.so.0 #1 0x00007fe741a58ec6 in reraise_fatal (signum=6) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/global/signal_handler.cc:74 #2 handle_fatal_signal (signum=6) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/global/signal_handler.cc:138 #3 <signal handler called> #4 0x00007fe73e83d1bd in __lll_lock_wait () from /lib64/libpthread.so.0 #5 0x00007fe73e838d02 in _L_lock_791 () from /lib64/libpthread.so.0 #6 0x00007fe73e838c08 in pthread_mutex_lock () from /lib64/libpthread.so.0 #7 0x00007fe741a772e8 in Mutex::Lock (this=this@entry=0x7fe74cd22de8, no_lockdep=no_lockdep@entry=false) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Mutex.cc:107 #8 0x00007fe7415249ec in Locker (m=..., this=<synthetic pointer>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/Mutex.h:115 #9 release_reserved_pushes (pushes=1, this=0x7fe74cd21860) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.h:910 #10 OSD::do_recovery (this=0x7fe74cd20000, pg=<optimized out>, queued=<optimized out>, reserved_pushes=1, handle=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:9383 #11 0x00007fe741786f09 in PGQueueable::RunVis::operator() (this=this@entry=0x7fe716f32030, op=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PGQueueable.cc:34 #12 0x00007fe741545249 in internal_visit<PGRecovery> (operand=..., this=<synthetic pointer>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/variant.hpp:1046 #13 visitation_impl_invoke_impl<boost::detail::variant::invoke_visitor<PGQueueable::RunVis>, void*, PGRecovery> (storage=0x7fe716f32080, visitor=<synthetic pointer>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/detail/visitation_impl.hpp:114 #14 visitation_impl_invoke<boost::detail::variant::invoke_visitor<PGQueueable::RunVis>, void*, PGRecovery, boost::variant<boost::intrusive_ptr<OpRequest>, PGSnapTrim, PGScrub, PGRecovery>::has_fallback_type_> (t=0x0, storage=0x7fe716f32080, visitor=<synthetic pointer>, internal_which=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/detail/visitation_impl.hpp:157 #15 visitation_impl<mpl_::int_<0>, boost::detail::variant::visitation_impl_step<boost::mpl::l_iter<boost::mpl::l_item<mpl_::long_<4l>, boost::intrusive_ptr<OpRequest>, boost::mpl::l_item<mpl_::long_<3l>, PGSnapTrim, boost::mpl::l_item<mpl_::long_<2l>, PGScrub, boost::mpl::l_item<mpl_::long_<1l>, PGRecovery, boost::mpl::l_end> > > > >, boost::mpl::l_iter<boost::mpl::l_end> >, boost::detail::variant::invoke_visitor<PGQueueable::RunVis>, void*, boost::variant<boost::intrusive_ptr<OpRequest>, PGSnapTrim, PGScrub, PGRecovery>::has_fallback_type_> (no_backup_flag=..., storage=0x7fe716f32080, visitor=<synthetic pointer>, logical_which=<optimized out>, internal_which=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/detail/visitation_impl.hpp:238 #16 internal_apply_visitor_impl<boost::detail::variant::invoke_visitor<PGQueueable::RunVis>, void*> (storage=0x7fe716f32080, visitor=<synthetic pointer>, logical_which=<optimized out>, internal_which=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/variant.hpp:2389 #17 internal_apply_visitor<boost::detail::variant::invoke_visitor<PGQueueable::RunVis> > (visitor=<synthetic pointer>, this=0x7fe716f32078) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/variant.hpp:2400 #18 apply_visitor<PGQueueable::RunVis> (visitor=..., this=0x7fe716f32078) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/variant.hpp:2423 #19 apply_visitor<PGQueueable::RunVis, boost::variant<boost::intrusive_ptr<OpRequest>, PGSnapTrim, PGScrub, PGRecovery> > (visitable=..., visitor=...) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/build/boost/include/boost/variant/detail/apply_visitor_unary.hpp:70 #20 run (handle=..., pg=..., osd=<optimized out>, this=0x7fe716f32078) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/PGQueueable.h:140 #21 OSD::ShardedOpWQ::_process (this=0x7fe74cd21240, thread_index=<optimized out>, hb=0x7fe74d348210) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/osd/OSD.cc:10277 #22 0x00007fe741a9cff9 in ShardedThreadPool::shardedthreadpool_worker (this=0x7fe74cd20a30, thread_index=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/WorkQueue.cc:339 #23 0x00007fe741a9ef90 in ShardedThreadPool::WorkThreadSharded::entry (this=<optimized out>) at /usr/src/debug/ceph-12.1.1-763-g93d27e3/src/common/WorkQueue.h:689 #24 0x00007fe73e836dc5 in start_thread () from /lib64/libpthread.so.0 #25 0x00007fe73d92a73d in clone () from /lib64/libc.so.6
/a/sage-2017-07-27_18:08:03-rados-wip-sage-testing-distro-basic-smithi/1453395
Actions