Project

General

Profile

Actions

Bug #51168

open

ceph-osd state machine crash during peering process

Added by Yao Ning almost 3 years ago. Updated over 1 year ago.

Status:
New
Priority:
Normal
Category:
Peering
Target version:
-
% Done:

0%

Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(RADOS):
OSD
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

[root@ceph-128 ~]# ceph crash info 2021-06-10_15:30:01.935970Z_f5d8ab5b-8613-408b-ac22-75425c5cf672
{
    "os_version_id": "7", 
    "assert_condition": "abort", 
    "utsname_release": "3.10.0-1127.el7.x86_64", 
    "os_name": "CentOS Linux", 
    "entity_name": "osd.43", 
    "assert_file": "/builddir/build/BUILD/ceph-14.2.18/src/osd/PG.cc", 
    "timestamp": "2021-06-10 15:30:01.935970Z", 
    "process_name": "ceph-osd", 
    "utsname_machine": "x86_64", 
    "assert_line": 7274, 
    "utsname_sysname": "Linux", 
    "os_version": "7 (Core)", 
    "os_id": "centos", 
    "assert_thread_name": "tp_osd_tp", 
    "utsname_version": "#1 SMP Tue Mar 31 23:36:51 UTC 2020", 
    "backtrace": [
        "(()+0xf630) [0x7f0101be1630]", 
        "(gsignal()+0x37) [0x7f01009d4387]", 
        "(abort()+0x148) [0x7f01009d5a78]", 
        "(ceph::__ceph_abort(char const*, int, char const*, std::string const&)+0x1a5) [0x55859f824b98]", 
        "(PG::RecoveryState::Crashed::Crashed(boost::statechart::state<PG::RecoveryState::Crashed, PG::RecoveryState::RecoveryMachine, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::my_context)+0xc3) [0x55859f9d5643]", 
        "(boost::statechart::state<PG::RecoveryState::Crashed, PG::RecoveryState::RecoveryMachine, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::deep_construct(boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<boost::statechart::none>, boost::statechart::null_exception_translator>* const&, boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<boost::statechart::none>, boost::statechart::null_exception_translator>&)+0x36) [0x55859fa2b4e6]", 
        "(boost::statechart::simple_state<PG::RecoveryState::ReplicaActive, PG::RecoveryState::Started, PG::RecoveryState::RepNotRecovering, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0x1d6) [0x55859fa2be26]", 
        "(boost::statechart::simple_state<PG::RecoveryState::RepNotRecovering, PG::RecoveryState::ReplicaActive, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0xd3) [0x55859fa2b483]", 
        "(PG::do_peering_event(std::shared_ptr<PGPeeringEvent>, PG::RecoveryCtx*)+0x2dd) [0x55859f9efe0d]", 
        "(OSD::dequeue_peering_evt(OSDShard*, PG*, std::shared_ptr<PGPeeringEvent>, ThreadPool::TPHandle&)+0x1b4) [0x55859f92c2c4]", 
        "(PGPeeringItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x51) [0x55859fb94951]", 
        "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x914) [0x55859f920d84]", 
        "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5b6) [0x55859fedcfe6]", 
        "(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x55859fedfb00]", 
        "(()+0x7ea5) [0x7f0101bd9ea5]", 
        "(clone()+0x6d) [0x7f0100a9c96d]" 
    ], 
    "utsname_hostname": "ceph-138", 
    "assert_msg": "/builddir/build/BUILD/ceph-14.2.18/src/osd/PG.cc: In function 'PG::RecoveryState::Crashed::Crashed(boost::statechart::state<PG::RecoveryState::Crashed, PG::RecoveryState::RecoveryMachine>::my_context)' thread 7f00daa4b700 time 2021-06-10 23:30:01.927379\n/builddir/build/BUILD/ceph-14.2.18/src/osd/PG.cc: 7274: ceph_abort_msg(\"we got a bad state machine event\")\n", 
    "crash_id": "2021-06-10_15:30:01.935970Z_f5d8ab5b-8613-408b-ac22-75425c5cf672", 
    "assert_func": "PG::RecoveryState::Crashed::Crashed(boost::statechart::state<PG::RecoveryState::Crashed, PG::RecoveryState::RecoveryMachine>::my_context)", 
    "ceph_version": "14.2.18" 
}


Files

ceph-osd.56.log-20210618.gz (319 KB) ceph-osd.56.log-20210618.gz Yao Ning, 07/06/2022 10:47 AM
ceph-osd.30.zip (505 KB) ceph-osd.30.zip Yao Ning, 08/22/2022 06:10 PM
Actions

Also available in: Atom PDF