Actions
Bug #50101
openunhandled event in ReplicaActive
Status:
New
Priority:
Normal
Assignee:
-
Category:
-
Target version:
-
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(RADOS):
Pull request ID:
Crash signature (v1):
9aa3c57e050d0d976cd83938712756b1e31a68adf250f406e06afa1d413c016f
Crash signature (v2):
Description
{ "assert_condition": "abort", "assert_file": "/home/jenkins-build/build/workspace/ceph-dev-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.1.0-1323-g7e7e1f4e/rpm/el8/BUILD/ceph-16.1.0-1323-g7e7e1f4e/src/osd/PeeringState.cc", "assert_func": "PeeringState::Crashed::Crashed(boost::statechart::state<PeeringState::Crashed, PeeringState::PeeringMachine>::my_context)", "assert_line": 4594, "assert_msg": "/home/jenkins-build/build/workspace/ceph-dev-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.1.0-1323-g7e7e1f4e/rpm/el8/BUILD/ceph-16.1.0-1323-g7e7e1f4e/src/osd/PeeringState.cc: In function 'PeeringState::Crashed::Crashed(boost::statechart::state<PeeringState::Crashed, PeeringState::PeeringMachine>::my_context)' thread 7f84ffe66700 time 2021-03-30T23:11:31.552274+0000\n/home/jenkins-build/build/workspace/ceph-dev-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.1.0-1323-g7e7e1f4e/rpm/el8/BUILD/ceph-16.1.0-1323-g7e7e1f4e/src/osd/PeeringState.cc: 4594: ceph_abort_msg(\"we got a bad state machine event\")\n", "assert_thread_name": "tp_osd_tp", "backtrace": [ "/lib64/libpthread.so.0(+0x12b20) [0x7f8521f0eb20]", "gsignal()", "abort()", "(ceph::__ceph_abort(char const*, int, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x1b6) [0x55b41d947509]", "(PeeringState::Crashed::Crashed(boost::statechart::state<PeeringState::Crashed, PeeringState::PeeringMachine, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::my_context)+0xc4) [0x55b41dceb554]", "(boost::statechart::state<PeeringState::Crashed, PeeringState::PeeringMachine, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::deep_construct(boost::statechart::state_machine<PeeringState::PeeringMachine, PeeringState::Initial, std::allocator<boost::statechart::none>, boost::statechart::null_exception_translator>* const&, boost::statechart::state_machine<PeeringState::PeeringMachine, PeeringState::Initial, std::allocator<boost::statechart::none>, boost::statechart::null_exception_translator>&)+0x3a) [0x55b41dd214fa]", "(boost::statechart::simple_state<PeeringState::ReplicaActive, PeeringState::Started, PeeringState::RepNotRecovering, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0x2e3) [0x55b41dd226d3]", "(boost::statechart::simple_state<PeeringState::RepRecovering, PeeringState::ReplicaActive, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0x99) [0x55b41dd206b9]", "(boost::statechart::state_machine<PeeringState::PeeringMachine, PeeringState::Initial, std::allocator<boost::statechart::none>, boost::statechart::null_exception_translator>::process_event(boost::statechart::event_base const&)+0x5b) [0x55b41db0a21b]", "(PG::do_peering_event(std::shared_ptr<PGPeeringEvent>, PeeringCtx&)+0x2d1) [0x55b41dafebf1]", "(OSD::dequeue_peering_evt(OSDShard*, PG*, std::shared_ptr<PGPeeringEvent>, ThreadPool::TPHandle&)+0x29c) [0x55b41da792cc]", "(ceph::osd::scheduler::PGPeeringItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x56) [0x55b41dca7ae6]", "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xa58) [0x55b41da6b048]", "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x55b41e0d2494]", "(ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x55b41e0d5134]", "/lib64/libpthread.so.0(+0x814a) [0x7f8521f0414a]", "clone()" ], "ceph_version": "16.1.0-1323-g7e7e1f4e", "crash_id": "2021-03-30T23:11:32.528564Z_ecb4c036-5628-4abf-ab37-a205037feaf2", "entity_name": "osd.65", "os_id": "centos", "os_name": "CentOS Linux", "os_version": "8", "os_version_id": "8", "process_name": "ceph-osd", "stack_sig": "9aa3c57e050d0d976cd83938712756b1e31a68adf250f406e06afa1d413c016f", "timestamp": "2021-03-30T23:11:32.528564Z", "utsname_hostname": "mira093", "utsname_machine": "x86_64", "utsname_release": "5.4.0-66-generic", "utsname_sysname": "Linux", "utsname_version": "#74~18.04.2-Ubuntu SMP Fri Feb 5 11:17:31 UTC 2021" }
Files
Updated by Neha Ojha about 3 years ago
-2219> 2021-03-30T23:10:27.629+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575944 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a883f:::1000ef5fccb.00000000:head local-lis/les=10573977/10573978 n=46366 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] enter Started/ReplicaActive/RepNotRecovering -2093> 2021-03-30T23:10:30.249+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575946 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a8b66:::1000f206522.00000000:head local-lis/les=10573977/10573978 n=46371 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive/RepNotRecovering 2.620139 8 0.000079 -2092> 2021-03-30T23:10:30.249+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575946 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a8b66:::1000f206522.00000000:head local-lis/les=10573977/10573978 n=46371 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] enter Started/ReplicaActive/RepWaitBackfillReserved -2091> 2021-03-30T23:10:30.249+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575946 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a8b66:::1000f206522.00000000:head local-lis/les=10573977/10573978 n=46371 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive/RepWaitBackfillReserved 0.000038 1 0.000188 -2090> 2021-03-30T23:10:30.249+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575946 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a8b66:::1000f206522.00000000:head local-lis/les=10573977/10573978 n=46371 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] enter Started/ReplicaActive/RepRecovering -2062> 2021-03-30T23:10:31.257+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575947 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive/RepRecovering 1.007368 4 0.000055 -2061> 2021-03-30T23:10:31.257+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575947 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] enter Started/ReplicaActive/RepNotRecovering -26> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive/RepNotRecovering 60.237929 5 0.000072 -25> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] enter Started/ReplicaActive/RepWaitBackfillReserved -24> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive/RepWaitBackfillReserved 0.000078 2 0.000130 -23> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] enter Started/ReplicaActive/RepNotRecovering -22> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive/RepNotRecovering 0.000195 1 0.000033 -21> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] enter Started/ReplicaActive/RepWaitBackfillReserved -18> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive/RepWaitBackfillReserved 0.000214 1 0.000069 -17> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] enter Started/ReplicaActive/RepRecovering -16> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive/RepRecovering 0.000018 1 0.000041 -15> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 10575941'3129523 active+remapped mbc={}] exit Started/ReplicaActive 378.850507 0 0.000000 -14> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 0'0 active+remapped mbc={}] exit Started 383.384446 0 0.000000 -13> 2021-03-30T23:11:31.493+0000 7f84ffe66700 5 osd.65 pg_epoch: 10575949 pg[114.57( v 10575941'3129534 (10575152'3126961,10575941'3129534] lb 114:ea7a9cb0:::1000ef1f65b.00000000:head local-lis/les=10573977/10573978 n=46406 ec=5887724/4421456 lis/c=10573977/10573790 les/c/f=10573978/10573794/9814718 sis=10575818) [83,65,8]/[83,40,125] r=-1 lpr=10575818 pi=[10573790,10575818)/3 luod=0'0 lua=10575817'3129484 crt=10575941'3129534 mlcod 0'0 active+remapped mbc={}] enter Crashed
Updated by Neha Ojha about 3 years ago
- Subject changed from PeeringState::Crashed::Crashed to unhandled event in ReplicaActive
This seems to be an unhandled event in the ReplicaActive state, couldn't find much in the logs that tell us what that was.
Actions