Project

General

Profile

Actions

Bug #55995

open

OSD Crash: /lib64/libpthread.so.0(+0x12ce0) [0x7f94cdcbbce0]

Added by Kilian Ries almost 2 years ago.

Status:
New
Priority:
Normal
Assignee:
-
Category:
-
Target version:
-
% Done:

0%

Source:
Community (user)
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(RADOS):
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

Hi,

i recently upgraded my ceph cluster from 14.2.x to 16.2.7 and switched to docker deployment. Since then, i see random crashed on my osd containers:

```
$ ceph crash info 2022-06-07T05:43:32.486598Z_4926b993-d7c6-482e-9f85-d129d01fb139 {
"archived": "2022-06-08 13:44:18.320037",
"backtrace": [
"/lib64/libpthread.so.0(0x12ce0) [0x7f94cdcbbce0]",
"gsignal()",
"abort()",
"/lib64/libstdc
+.so.6(0x9009b) [0x7f94cd2d209b]",
"/lib64/libstdc
+.so.6(0x9653c) [0x7f94cd2d853c]",
"/lib64/libstdc
+.so.6(+0x96597) [0x7f94cd2d8597]",
"__cxa_rethrow()",
"(std::_Hashtable<osd_reqid_t, std::pair<osd_reqid_t const, pg_log_dup_t*>, std::allocator<std::pair<osd_reqid_t const, pg_log_dup_t*> >, std::__detail::_Select1st, std::equal_to<osd_reqid_t>, std::hash<osd_reqid_t>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true> >::_M_insert_unique_node(unsigned long, unsigned long, std::__detail::_Hash_node<std::pair<osd_reqid_t const, pg_log_dup_t*>, true>, unsigned long)+0x101) [0x55f220a66121]",
"(PGLog::IndexedLog::index(unsigned short) const+0x2f2) [0x55f220a6a322]",
"(PGLog::proc_replica_log(pg_info_t&, pg_log_t const&, pg_missing_set<false>&, pg_shard_t) const+0x68f) [0x55f220a7f06f]",
"(PeeringState::proc_replica_log(pg_info_t&, pg_log_t const&, pg_missing_set<false>&&, pg_shard_t)+0x69) [0x55f220c317e9]",
"(PeeringState::GetMissing::react(MLogRec const&)+0x71) [0x55f220c43cf1]",
"(boost::statechart::simple_state<PeeringState::GetMissing, PeeringState::Peering, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const
)+0x95) [0x55f220c7c1e5]",
"(boost::statechart::state_machine<PeeringState::PeeringMachine, PeeringState::Initial, std::allocator<boost::statechart::none>, boost::statechart::null_exception_translator>::process_event(boost::statechart::event_base const&)+0x5b) [0x55f220a62f6b]",
"(PG::do_peering_event(std::shared_ptr<PGPeeringEvent>, PeeringCtx&)+0x2d1) [0x55f220a579e1]",
"(OSD::dequeue_peering_evt(OSDShard*, PG*, std::shared_ptr<PGPeeringEvent>, ThreadPool::TPHandle&)+0x29c) [0x55f2209d1e5c]",
"(ceph::osd::scheduler::PGPeeringItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x56) [0x55f220c010e6]",
"(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xc28) [0x55f2209c3d48]",
"(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x55f2210355b4]",
"(ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x55f221038254]",
"/lib64/libpthread.so.0(+0x81cf) [0x7f94cdcb11cf]",
"clone()"
],
"ceph_version": "16.2.7",
"crash_id": "2022-06-07T05:43:32.486598Z_4926b993-d7c6-482e-9f85-d129d01fb139",
"entity_name": "osd.42",
"os_id": "centos",
"os_name": "CentOS Stream",
"os_version": "8",
"os_version_id": "8",
"process_name": "ceph-osd",
"stack_sig": "3d99c77943507c160e29d62348603cc09ae49e8d224c2f6c2e02b079bb704f85",
"timestamp": "2022-06-07T05:43:32.486598Z",
"utsname_hostname": "ceph-osd04.intern.customer-virt.eu",
"utsname_machine": "x86_64",
"utsname_release": "3.10.0-1160.62.1.el7.x86_64",
"utsname_sysname": "Linux",
"utsname_version": "#1 SMP Tue Apr 5 16:57:59 UTC 2022"
}
```

```
$ ceph crash info 2022-06-01T09:19:08.664119Z_90bb91ce-b2c0-40fd-8159-8a2875632641 {
"archived": "2022-06-01 11:40:18.674930",
"backtrace": [
"/lib64/libpthread.so.0(+0x12ce0) [0x7fb9769e3ce0]",
"(std::__detail::_List_node_base::_M_unhook()+0xe) [0x7fb976008dde]",
"(PGLog::IndexedLog::trim(ceph::common::CephContext*, eversion_t, std::set<eversion_t, std::less<eversion_t>, std::allocator<eversion_t> >, std::set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, eversion_t*)+0xaa9) [0x55945a2057c9]",
"(PGLog::trim(eversion_t, pg_info_t&, bool, bool)+0x20b) [0x55945a2065bb]",
"(PeeringState::append_log(std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> >&&, eversion_t, eversion_t, eversion_t, ceph::os::Transaction&, bool, bool)+0x198) [0x55945a3c25b8]",
"(non-virtual thunk to PrimaryLogPG::log_operation(std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> >&&, std::optional<pg_hit_set_history_t> const&, eversion_t const&, eversion_t const&, eversion_t const&, bool, ceph::os::Transaction&, bool)+0xb8) [0x55945a2e9888]",
"(ReplicatedBackend::do_repop(boost::intrusive_ptr<OpRequest>)+0xd0e) [0x55945a4bc93e]",
"(ReplicatedBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x267) [0x55945a4cce47]",
"(PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x52) [0x55945a2ff7d2]",
"(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x5de) [0x55945a2a295e]",
"(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x309) [0x55945a12b8e9]",
"(ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x68) [0x55945a389838]",
"(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xc28) [0x55945a14bd48]",
"(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x55945a7bd5b4]",
"(ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x55945a7c0254]",
"/lib64/libpthread.so.0(+0x81cf) [0x7fb9769d91cf]",
"clone()"
],
"ceph_version": "16.2.7",
"crash_id": "2022-06-01T09:19:08.664119Z_90bb91ce-b2c0-40fd-8159-8a2875632641",
"entity_name": "osd.2",
"os_id": "centos",
"os_name": "CentOS Stream",
"os_version": "8",
"os_version_id": "8",
"process_name": "ceph-osd",
"stack_sig": "7e952eb1832afe8d7fdd8c6b0fcb0ca3af8f1410a148bf4826aef6a8ef3723aa",
"timestamp": "2022-06-01T09:19:08.664119Z",
"utsname_hostname": "ceph-osd01.intern.customer-virt.eu",
"utsname_machine": "x86_64",
"utsname_release": "3.10.0-1160.62.1.el7.x86_64",
"utsname_sysname": "Linux",
"utsname_version": "#1 SMP Tue Apr 5 16:57:59 UTC 2022"
}
```

Regards,
Kilian

No data to display

Actions

Also available in: Atom PDF