Actions
Bug #9731
closedCeph 0.80.6 OSD crashes
% Done:
0%
Source:
Community (user)
Tags:
Backport:
Regression:
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
I received 2 different crashes on 2 different OSDs on different nodes within 30s of eachother on 0.80.6. I just upgraded from 0.80.5 where I never saw these kind of issues
2014-10-10 12:06:32.502375 7f4bb3a3f700 5 -- op tracker -- , seq: 9606, time: 2014-10-10 12:06:32.502375, event: waiting_for_osdmap, request: osd_sub_op_reply(client.4443832.0:6639 3.29 d5d4c329/rbd_data.440302ae8944a.0000000000001173/head//3 [] ondisk, result = 0) v2 0> 2014-10-10 12:06:32.506607 7f4bae234700 -1 *** Caught signal (Segmentation fault) ** in thread 7f4bae234700 ceph version 0.80.6 (f93610a4421cb670b08e974c6550ee715ac528ae) 1: /usr/bin/ceph-osd() [0xab5882] 2: (()+0xf030) [0x7f4bc613c030] 3: (ceph::buffer::ptr::ptr(ceph::buffer::ptr const&)+0) [0xb94360] 4: /usr/bin/ceph-osd() [0x7372d2] 5: (ceph::buffer::list::list(ceph::buffer::list const&)+0x1e) [0x737e2e] 6: (pg_log_entry_t::pg_log_entry_t(pg_log_entry_t const&)+0xba) [0x7b354a] 7: /usr/bin/ceph-osd() [0x75196f] 8: (PG::PGLogEntryHandler::trim(pg_log_entry_t const&)+0xc) [0x7b35ec] 9: (PGLog::IndexedLog::advance_rollback_info_trimmed_to(eversion_t, PGLog::LogEntryHandler*)+0x83) [0x824d73] 10: (PG::append_log(std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> >&, eversion_t, eversion_t, ObjectStore::Transaction&, bool)+0x833) [0x890bb3] 11: (ReplicatedPG::log_operation(std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> >&, boost::optional<pg_hit_set_history_t>&, eversion_t const&, eversion_t const&, bool, ObjectStore::Transaction*)+0xca) [0x96036a] 12: (ReplicatedBackend::submit_transaction(hobject_t const&, eversion_t const&, PGBackend::PGTransaction*, eversion_t const&, eversion_t const&, std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> >&, boost::optional<pg_hit_set_history_t>&, Context*, Context*, Context*, unsigned long, osd_reqid_t, std::tr1::shared_ptr<OpRequest>)+0x634) [0xa38294] 13: (ReplicatedPG::issue_repop(ReplicatedPG::RepGather*, utime_t)+0x33b) [0x8f194b] 14: (ReplicatedPG::execute_ctx(ReplicatedPG::OpContext*)+0xf77) [0x94c757] 15: (ReplicatedPG::do_op(std::tr1::shared_ptr<OpRequest>)+0x2471) [0x94f511] 16: (ReplicatedPG::do_request(std::tr1::shared_ptr<OpRequest>, ThreadPool::TPHandle&)+0x52b) [0x8e64ab] 17: (OSD::dequeue_op(boost::intrusive_ptr<PG>, std::tr1::shared_ptr<OpRequest>, ThreadPool::TPHandle&)+0x336) [0x7428c6] 18: (OSD::OpWQ::_process(boost::intrusive_ptr<PG>, ThreadPool::TPHandle&)+0x1ea) [0x75f78a] 19: (ThreadPool::WorkQueueVal<std::pair<boost::intrusive_ptr<PG>, std::tr1::shared_ptr<OpRequest> >, boost::intrusive_ptr<PG> >::_void_process(void*, ThreadPool::TPHandle&)+0xae) [0x79be4e] 20: (ThreadPool::worker(ThreadPool::WorkThread*)+0x68a) [0xb81b1a] 21: (ThreadPool::WorkThread::entry()+0x10) [0xb82d60] 22: (()+0x6b50) [0x7f4bc6133b50] 23: (clone()+0x6d) [0x7f4bc4d57e6d]
-1> 2014-10-10 12:06:56.493260 7f1d47702700 5 osd.2 pg_epoch: 1752 pg[3.ee( v 1702'5694 (175'2693,1702'5694] local-les=1752 n=117 ec=56 les/c 1750/1750 1751/1751/1751) [2] r=0 lpr=1751 pi=1742-1750/4 crt=1702'5692 lcod 0'0 mlcod 0'0 degraded] enter Started/Primary/Active/Activating 0> 2014-10-10 12:06:56.493182 7f1d47f03700 -1 *** Caught signal (Segmentation fault) ** in thread 7f1d47f03700 ceph version 0.80.6 (f93610a4421cb670b08e974c6550ee715ac528ae) 1: /usr/bin/ceph-osd() [0xab5882] 2: (()+0xf030) [0x7f1d5fd05030] 3: (PG::log_weirdness()+0x1b0) [0x865800] 4: (PG::activate(ObjectStore::Transaction&, unsigned int, std::list<Context*, std::allocator<Context*> >&, std::map<int, std::map<spg_t, pg_query_t, std::less<spg_t>, std::allocator<std::pair<spg_t const, pg_query_t> > >, std::less<int>, std::allocator<std::pair<int const, std::map<spg_t, pg_query_t, std::less<spg_t>, std::allocator<std::pair<spg_t const, pg_query_t> > > > > >&, std::map<int, std::vector<std::pair<pg_notify_t, std::map<unsigned int, pg_interval_t, std::less<unsigned int>, std::allocator<std::pair<unsigned int const, pg_interval_t> > > >, std::allocator<std::pair<pg_notify_t, std::map<unsigned int, pg_interval_t, std::less<unsigned int>, std::allocator<std::pair<unsigned int const, pg_interval_t> > > > > >, std::less<int>, std::allocator<std::pair<int const, std::vector<std::pair<pg_notify_t, std::map<unsigned int, pg_interval_t, std::less<unsigned int>, std::allocator<std::pair<unsigned int const, pg_interval_t> > > >, std::allocator<std::pair<pg_notify_t, std::map<unsigned int, pg_interval_t, std::less<unsigned int>, std::allocator<std::pair<unsigned int const, pg_interval_t> > > > > > > > >*, PG::RecoveryCtx*)+0x395) [0x89cf45] 5: (PG::RecoveryState::Active::Active(boost::statechart::state<PG::RecoveryState::Active, PG::RecoveryState::Primary, PG::RecoveryState::Activating, (boost::statechart::history_mode)0>::my_context)+0x52e) [0x8a05ce] 6: (boost::statechart::state<PG::RecoveryState::Active, PG::RecoveryState::Primary, PG::RecoveryState::Activating, (boost::statechart::history_mode)0>::shallow_construct(boost::intrusive_ptr<PG::RecoveryState::Primary> const&, boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>&)+0x5c) [0x8db1fc] 7: (boost::statechart::state<PG::RecoveryState::Active, PG::RecoveryState::Primary, PG::RecoveryState::Activating, (boost::statechart::history_mode)0>::deep_construct(boost::intrusive_ptr<PG::RecoveryState::Primary> const&, boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>&)+0x16) [0x8dc596] 8: (boost::statechart::detail::safe_reaction_result boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0>::transit_impl<PG::RecoveryState::Active, PG::RecoveryState::RecoveryMachine, boost::statechart::detail::no_transition_function>(boost::statechart::detail::no_transition_function const&)+0x8d) [0x8dc66d] 9: (boost::statechart::transition<PG::Activate, PG::RecoveryState::Active, boost::statechart::detail::no_context<PG::Activate>, &boost::statechart::detail::no_context<PG::Activate>::no_function>::reactions<PG::RecoveryState::Peering>::react_without_action(PG::RecoveryState::Peering&)+0x12) [0x8dc702] 10: (boost::statechart::detail::reaction_result boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0>::local_react_impl_non_empty::local_react_impl<boost::mpl::list2<boost::statechart::transition<PG::Activate, PG::RecoveryState::Active, boost::statechart::detail::no_context<PG::Activate>, &boost::statechart::detail::no_context<PG::Activate>::no_function>, boost::statechart::custom_reaction<PG::AdvMap> >, boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0> >(boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0>&, boost::statechart::event_base const&, void const*)+0x7b) [0x8dc78b] 11: (boost::statechart::detail::reaction_result boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0>::local_react_impl_non_empty::local_react_impl<boost::mpl::list<boost::statechart::custom_reaction<PG::QueryState>, boost::statechart::transition<PG::Activate, PG::RecoveryState::Active, boost::statechart::detail::no_context<PG::Activate>, &boost::statechart::detail::no_context<PG::Activate>::no_function>, boost::statechart::custom_reaction<PG::AdvMap>, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0> >(boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0>&, boost::statechart::event_base const&, void const*)+0x57) [0x8dc7f7] 12: (boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0x21) [0x8dc871] 13: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::process_queued_events()+0x137) [0x8bca17] 14: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::process_event(boost::statechart::event_base const&)+0x26) [0x8bcff6] 15: (PG::handle_activate_map(PG::RecoveryCtx*)+0xb4) [0x873304] 16: (OSD::advance_pg(unsigned int, PG*, ThreadPool::TPHandle&, PG::RecoveryCtx*, std::set<boost::intrusive_ptr<PG>, std::less<boost::intrusive_ptr<PG> >, std::allocator<boost::intrusive_ptr<PG> > >*)+0x77f) [0x775b1f] 17: (OSD::process_peering_events(std::list<PG*, std::allocator<PG*> > const&, ThreadPool::TPHandle&)+0x31c) [0x77635c] 18: (OSD::PeeringWQ::_process(std::list<PG*, std::allocator<PG*> > const&, ThreadPool::TPHandle&)+0x14) [0x7d3494] 19: (ThreadPool::worker(ThreadPool::WorkThread*)+0x68a) [0xb81b1a] 20: (ThreadPool::WorkThread::entry()+0x10) [0xb82d60] 21: (()+0x6b50) [0x7f1d5fcfcb50] 22: (clone()+0x6d) [0x7f1d5e920e6d] NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this.
I know there have been other reported issues with assertions being triggered in 0.80.6 (#9696), but haven't seen any with actual segmentation faults.
Files
Actions