Actions
Bug #1073
closedosd: failed assert: info.last_complete == info.last_update
Source:
Tags:
Backport:
Regression:
Severity:
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
I got this crash on several osds:
osd/PG.cc: In function 'void PG::finish_recovery(ObjectStore::Transaction&, std::list<Context*, std::allocator<Context*> >&)', in thread '0x7fce5ac15710' osd/PG.cc: 1721: FAILED assert(info.last_complete == info.last_update) ceph version 0.27.1-307-g85292b3 (commit:85292b367b0e6e6d8963de32ad198482500c887f) 1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x58) [0x955fc0] 2: (PG::finish_recovery(ObjectStore::Transaction&, std::list<Context*, std::allocator<Context*> >&)+0xde) [0x83a070] 3: (PG::RecoveryState::Active::react(PG::RecoveryState::ActMap const&)+0x4f5) [0x84c08d] 4: (boost::statechart::detail::reaction_result boost::statechart::custom_reaction<PG::RecoveryState::ActMap>::react<PG::RecoveryState::Active, boost::statechart::event_base, void const*>(PG::Recov eryState::Active&, boost::statechart::event_base const&, void const* const&)+0x52) [0x886e6f] 5: (boost::statechart::detail::reaction_result boost::statechart::simple_state<PG::RecoveryState::Active, PG::RecoveryState::Primary, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_: :na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::loca l_react_impl_non_empty::local_react_impl<boost::mpl::list<boost::statechart::custom_reaction<PG::RecoveryState::ActMap>, boost::statechart::custom_reaction<PG::RecoveryState::AdvMap>, boost::statec hart::custom_reaction<PG::RecoveryState::MInfoRec>, boost::statechart::custom_reaction<PG::RecoveryState::MNotifyRec>, boost::statechart::custom_reaction<PG::RecoveryState::MLogRec>, mpl_::na, mpl_ ::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, boost::statechart::simple_state<PG::RecoveryState::Active, P G::RecoveryState::Primary, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl _::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0> >(boost::statechart::simple_state<PG::RecoveryState::Active, PG::RecoveryState::Primary, boost::mpl::list<mpl_::n a, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (bo ost::statechart::history_mode)0>&, boost::statechart::event_base const&, void const*)+0x38) [0x885424] 6: (boost::statechart::detail::reaction_result boost::statechart::simple_state<PG::RecoveryState::Active, PG::RecoveryState::Primary, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_: :na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::loca l_react<boost::mpl::list<boost::statechart::custom_reaction<PG::RecoveryState::ActMap>, boost::statechart::custom_reaction<PG::RecoveryState::AdvMap>, boost::statechart::custom_reaction<PG::Recover yState::MInfoRec>, boost::statechart::custom_reaction<PG::RecoveryState::MNotifyRec>, boost::statechart::custom_reaction<PG::RecoveryState::MLogRec>, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::n a, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na> >(boost::statechart::event_base const&, void const*)+0x30) [0x8834fe] 7: (boost::statechart::simple_state<PG::RecoveryState::Active, PG::RecoveryState::Primary, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na , mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const& , void const*)+0x31) [0x880c89] 8: (boost::statechart::detail::send_function<boost::statechart::detail::state_base<std::allocator<void>, boost::statechart::detail::rtti_policy>, boost::statechart::event_base, void const*>::opera tor()()+0x4b) [0x817d4b] 9: (boost::statechart::detail::safe_reaction_result boost::statechart::null_exception_translator::operator()<boost::statechart::detail::send_function<boost::statechart::detail::state_base<std::allocator<void>, boost::statechart::detail::rtti_policy>, boost::statechart::event_base, void const*>, boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::exception_event_handler>(boost::statechart::detail::send_function<boost::statechart::detail::state_base<std::allocator<void>, boost::statechart::detail::rtti_policy>, boost::statechart::event_base, void const*>, boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::exception_event_handler)+0x2f) [0x808805] 10: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::send_event(boost::statechart::event_base const&)+0x149) [0x7f3147] 11: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::process_event(boost::statechart::event_base const&)+0x28) [0x860084] 12: (PG::RecoveryState::handle_activate_map(PG::RecoveryCtx*)+0x124) [0x852baa] 13: (PG::handle_activate_map(PG::RecoveryCtx*)+0x2f) [0x7c6a93] 14: (OSD::activate_map(ObjectStore::Transaction&, std::list<Context*, std::allocator<Context*> >&)+0x2ea) [0x7ad9bc] 15: (OSD::handle_osd_map(MOSDMap*)+0x1917) [0x7ab7ad] 16: (OSD::_dispatch(Message*)+0x358) [0x7a817c] 17: (OSD::ms_dispatch(Message*)+0x125) [0x7a74e5] 18: (Messenger::ms_deliver_dispatch(Message*)+0x68) [0x6fad5e] 19: (SimpleMessenger::dispatch_entry()+0x702) [0x6e8c0e] 20: (SimpleMessenger::DispatchThread::entry()+0x31) [0x6dddd1] 21: (Thread::_entry_func(void*)+0x28) [0x6f9af5] 22: (()+0x68ba) [0x7fce6527a8ba] 23: (clone()+0x6d) [0x7fce63f0f02d]
To reproduce:
Run 20 osds on one machine, and a rados write benchmark:
CEPH_NUM_OSD=20 CEPH_NUM_MDS=1 ./vstart.sh -d -n ./rados bench 10000 -p data write
Mark some osds out, with delays in between (I did 0-4).
Mark osd.0 in. Wait for it to peer and recover.
Logs, cores, and executables are in vit:/home/joshd/crashes/finish_recovery.
Actions