Neha Ojha wrote:
..., it seems like you have "enough copies available" to remove the problematic OSD but we won't be able to say for sure without looking at the PG state/logs.
Yes, we do have "enough copies available". In fact, after we bring the OSD running on the faulty disk (osd.13) down, the recovery was completed.
The following logs are extracted with "journalctl -u ceph-e88d509a-f6fc-11ea-b25d-a0423f3ac864@osd.<id>.service --since '2021-12-12 22:15'" (My timezone is +8).
osd.22 (the crashing OSD):
debug 2021-12-12T14:19:29.813+0000 7fd7ae91a700 0 log_channel(cluster) log [WRN] : Error(s) ignored for 19:7c35c1c3:::2000852f9aa.00000000:head enough copies available
/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.6/rpm/el8/BUILD/ceph-16.2.6/src/osd/ECBackend.cc: In function 'void ECBackend::continue_recovery_op(ECBackend::RecoveryOp&, RecoveryMessages*)' thread 7fd7ae91a700 time 2021-12-12T14:19:29.839097+0000
/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.6/rpm/el8/BUILD/ceph-16.2.6/src/osd/ECBackend.cc: 670: FAILED ceph_assert(pop.data.length() == sinfo.aligned_logical_offset_to_chunk_offset( after_progress.data_recovered_to - op.recovery_progress.data_recovered_to))
ceph version 16.2.6 (ee28fb57e47e9f88813e24bbf4c14496ca299d31) pacific (stable)
1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x158) [0x56017596a54c]
2: /usr/bin/ceph-osd(+0x56a766) [0x56017596a766]
3: (ECBackend::continue_recovery_op(ECBackend::RecoveryOp&, RecoveryMessages*)+0x1b9e) [0x560175e2f7ae]
4: (ECBackend::handle_recovery_read_complete(hobject_t const&, boost::tuples::tuple<unsigned long, unsigned long, std::map<pg_shard_t, ceph::buffer::v15_2_0::list, std::less<pg_shard_t>, std::allocator<std::pair<pg_shard_t const, ceph::buffer::v15_2_0::list> > >, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type>&, std::optional<std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > >, RecoveryMessages*)+0x855) [0x560175e302d5]
5: (OnRecoveryReadComplete::finish(std::pair<RecoveryMessages*, ECBackend::read_result_t&>&)+0x71) [0x560175e53e91]
6: (ECBackend::complete_read_op(ECBackend::ReadOp&, RecoveryMessages*)+0x8f) [0x560175e22faf]
7: (ECBackend::handle_sub_read_reply(pg_shard_t, ECSubReadReply&, RecoveryMessages*, ZTracer::Trace const&)+0x1196) [0x560175e3c106]
8: (ECBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x18f) [0x560175e3cbdf]
9: (PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x52) [0x560175c42d12]
10: (PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x5de) [0x560175be5d6e]
11: (OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x309) [0x560175a6f1b9]
12: (ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x68) [0x560175ccc868]
13: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xa58) [0x560175a8f1e8]
14: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x5601760fa6c4]
15: (ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x5601760fd364]
16: /lib64/libpthread.so.0(+0x814a) [0x7fd7d062f14a]
17: clone()
*** Caught signal (Aborted) **
in thread 7fd7ae91a700 thread_name:tp_osd_tp
debug 2021-12-12T14:19:29.841+0000 7fd7ae91a700 -1 /home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.6/rpm/el8/BUILD/ceph-16.2.6/src/osd/ECBackend.cc: In function 'void ECBackend::continue_recovery_op(ECBackend::RecoveryOp&, RecoveryMessages*)' thread 7fd7ae91a700 time 2021-12-12T14:19:29.839097+0000
/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.6/rpm/el8/BUILD/ceph-16.2.6/src/osd/ECBackend.cc: 670: FAILED ceph_assert(pop.data.length() == sinfo.aligned_logical_offset_to_chunk_offset( after_progress.data_recovered_to - op.recovery_progress.data_recovered_to))
ceph version 16.2.6 (ee28fb57e47e9f88813e24bbf4c14496ca299d31) pacific (stable)
1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x158) [0x56017596a54c]
2: /usr/bin/ceph-osd(+0x56a766) [0x56017596a766]
3: (ECBackend::continue_recovery_op(ECBackend::RecoveryOp&, RecoveryMessages*)+0x1b9e) [0x560175e2f7ae]
4: (ECBackend::handle_recovery_read_complete(hobject_t const&, boost::tuples::tuple<unsigned long, unsigned long, std::map<pg_shard_t, ceph::buffer::v15_2_0::list, std::less<pg_shard_t>, std::allocator<std::pair<pg_shard_t const, ceph::buffer::v15_2_0::list> > >, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type>&, std::optional<std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > >, RecoveryMessages*)+0x855) [0x560175e302d5]
5: (OnRecoveryReadComplete::finish(std::pair<RecoveryMessages*, ECBackend::read_result_t&>&)+0x71) [0x560175e53e91]
6: (ECBackend::complete_read_op(ECBackend::ReadOp&, RecoveryMessages*)+0x8f) [0x560175e22faf]
7: (ECBackend::handle_sub_read_reply(pg_shard_t, ECSubReadReply&, RecoveryMessages*, ZTracer::Trace const&)+0x1196) [0x560175e3c106]
8: (ECBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x18f) [0x560175e3cbdf]
9: (PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x52) [0x560175c42d12]
10: (PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x5de) [0x560175be5d6e]
11: (OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x309) [0x560175a6f1b9]
12: (ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x68) [0x560175ccc868]
13: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xa58) [0x560175a8f1e8]
14: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x5601760fa6c4]
15: (ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x5601760fd364]
16: /lib64/libpthread.so.0(+0x814a) [0x7fd7d062f14a]
17: clone()
ceph version 16.2.6 (ee28fb57e47e9f88813e24bbf4c14496ca299d31) pacific (stable)
1: /lib64/libpthread.so.0(+0x12b20) [0x7fd7d0639b20]
2: gsignal()
3: abort()
4: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a9) [0x56017596a59d]
5: /usr/bin/ceph-osd(+0x56a766) [0x56017596a766]
6: (ECBackend::continue_recovery_op(ECBackend::RecoveryOp&, RecoveryMessages*)+0x1b9e) [0x560175e2f7ae]
7: (ECBackend::handle_recovery_read_complete(hobject_t const&, boost::tuples::tuple<unsigned long, unsigned long, std::map<pg_shard_t, ceph::buffer::v15_2_0::list, std::less<pg_shard_t>, std::allocator<std::pair<pg_shard_t const, ceph::buffer::v15_2_0::list> > >, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type>&, std::optional<std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > >, RecoveryMessages*)+0x855) [0x560175e302d5]
8: (OnRecoveryReadComplete::finish(std::pair<RecoveryMessages*, ECBackend::read_result_t&>&)+0x71) [0x560175e53e91]
9: (ECBackend::complete_read_op(ECBackend::ReadOp&, RecoveryMessages*)+0x8f) [0x560175e22faf]
10: (ECBackend::handle_sub_read_reply(pg_shard_t, ECSubReadReply&, RecoveryMessages*, ZTracer::Trace const&)+0x1196) [0x560175e3c106]
11: (ECBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x18f) [0x560175e3cbdf]
12: (PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x52) [0x560175c42d12]
13: (PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x5de) [0x560175be5d6e]
14: (OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x309) [0x560175a6f1b9]
15: (ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x68) [0x560175ccc868]
16: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xa58) [0x560175a8f1e8]
17: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x5601760fa6c4]
18: (ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x5601760fd364]
19: /lib64/libpthread.so.0(+0x814a) [0x7fd7d062f14a]
20: clone()
debug 2021-12-12T14:19:29.849+0000 7fd7ae91a700 -1 *** Caught signal (Aborted) **
in thread 7fd7ae91a700 thread_name:tp_osd_tp
ceph version 16.2.6 (ee28fb57e47e9f88813e24bbf4c14496ca299d31) pacific (stable)
1: /lib64/libpthread.so.0(+0x12b20) [0x7fd7d0639b20]
2: gsignal()
3: abort()
4: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a9) [0x56017596a59d]
5: /usr/bin/ceph-osd(+0x56a766) [0x56017596a766]
6: (ECBackend::continue_recovery_op(ECBackend::RecoveryOp&, RecoveryMessages*)+0x1b9e) [0x560175e2f7ae]
7: (ECBackend::handle_recovery_read_complete(hobject_t const&, boost::tuples::tuple<unsigned long, unsigned long, std::map<pg_shard_t, ceph::buffer::v15_2_0::list, std::less<pg_shard_t>, std::allocator<std::pair<pg_shard_t const, ceph::buffer::v15_2_0::list> > >, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type>&, std::optional<std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > >, RecoveryMessages*)+0x855) [0x560175e302d5]
8: (OnRecoveryReadComplete::finish(std::pair<RecoveryMessages*, ECBackend::read_result_t&>&)+0x71) [0x560175e53e91]
9: (ECBackend::complete_read_op(ECBackend::ReadOp&, RecoveryMessages*)+0x8f) [0x560175e22faf]
10: (ECBackend::handle_sub_read_reply(pg_shard_t, ECSubReadReply&, RecoveryMessages*, ZTracer::Trace const&)+0x1196) [0x560175e3c106]
11: (ECBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x18f) [0x560175e3cbdf]
12: (PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x52) [0x560175c42d12]
13: (PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x5de) [0x560175be5d6e]
14: (OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x309) [0x560175a6f1b9]
15: (ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x68) [0x560175ccc868]
16: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xa58) [0x560175a8f1e8]
17: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x5601760fa6c4]
18: (ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x5601760fd364]
19: /lib64/libpthread.so.0(+0x814a) [0x7fd7d062f14a]
20: clone()
NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this.
I think the event dump below it does not contain useful information. But I can post it if needed.
# rados stat --pool cephfs.cephfs.data_ec 2000852f9aa.00000000
cephfs.cephfs.data_ec/2000852f9aa.00000000 mtime 2021-12-12T08:41:24.000000+0000, size 1049562
# ceph osd map cephfs.cephfs.data_ec 2000852f9aa.00000000
osdmap e53532 pool 'cephfs.cephfs.data_ec' (19) object '2000852f9aa.00000000' -> pg 19.c383ac3e (19.1e) -> up ([22,27,36,14,31,1,28,38,44,25], p22) acting ([22,27,36,14,31,1,28,38,44,25], p22)
Note: when crashing, osd.13 is in the place of osd.27, which happens before we bring osd.13 down
oSD.13 (the OSD that produce read error):
debug 2021-12-12T14:17:20.047+0000 7fb7df73c700 -1 bdev(0x562ca0160800 /var/lib/ceph/osd/ceph-13/block) read stalled read 0x31de54000~3000 (buffered) since 1109739.526614s, timeout is 5.000000s
debug 2021-12-12T14:17:20.047+0000 7fb7de73a700 -1 bdev(0x562ca0160800 /var/lib/ceph/osd/ceph-13/block) read stalled read 0x1a44f1000~4000 (buffered) since 1109739.508168s, timeout is 5.000000s
debug 2021-12-12T14:17:20.059+0000 7fb7ddf39700 -1 bdev(0x562ca0160800 /var/lib/ceph/osd/ceph-13/block) read stalled read 0xa8021ba000~1000 (buffered) since 1109739.525985s, timeout is 5.000000s
debug 2021-12-12T14:19:29.731+0000 7fb7f5cc1700 -1 bdev(0x562ca0160400 /var/lib/ceph/osd/ceph-13/block) _aio_thread got r=-5 ((5) Input/output error)
debug 2021-12-12T14:19:29.731+0000 7fb7f5cc1700 -1 bdev(0x562ca0160400 /var/lib/ceph/osd/ceph-13/block) _aio_thread translating the error to EIO for upper layer
debug 2021-12-12T14:19:29.731+0000 7fb7f5cc1700 -1 bdev(0x562ca0160400 /var/lib/ceph/osd/ceph-13/block) _aio_thread got r=-5 ((5) Input/output error)
debug 2021-12-12T14:19:29.731+0000 7fb7f5cc1700 -1 bdev(0x562ca0160400 /var/lib/ceph/osd/ceph-13/block) _aio_thread translating the error to EIO for upper layer
debug 2021-12-12T14:19:29.731+0000 7fb7f5cc1700 -1 bdev(0x562ca0160400 /var/lib/ceph/osd/ceph-13/block) _aio_thread got r=-5 ((5) Input/output error)
debug 2021-12-12T14:19:29.731+0000 7fb7f5cc1700 -1 bdev(0x562ca0160400 /var/lib/ceph/osd/ceph-13/block) _aio_thread translating the error to EIO for upper layer
debug 2021-12-12T14:19:29.731+0000 7fb7dff3d700 0 bluestore(/var/lib/ceph/osd/ceph-13) log_latency slow operation observed for read, latency = 10.166154861s
debug 2021-12-12T14:19:29.731+0000 7fb7dff3d700 -1 log_channel(cluster) log [ERR] : Error -5 reading object 19:7c35c1c3:::2000852f9aa.00000000:head
debug 2021-12-12T14:19:29.739+0000 7fb7de73a700 -1 bdev(0x562ca0160800 /var/lib/ceph/osd/ceph-13/block) read stalled read 0x180193000~2000 (buffered) since 1109867.356007s, timeout is 5.000000s
debug 2021-12-12T14:19:29.743+0000 7fb7df73c700 -1 bdev(0x562ca0160800 /var/lib/ceph/osd/ceph-13/block) read stalled read 0x31df25000~3000 (buffered) since 1109867.354714s, timeout is 5.000000s
debug 2021-12-12T14:19:29.771+0000 7fb7ddf39700 -1 bdev(0x562ca0160800 /var/lib/ceph/osd/ceph-13/block) read stalled read 0xa93e424000~4000 (buffered) since 1109867.356085s, timeout is 5.000000s
debug 2021-12-12T14:19:29.783+0000 7fb7def3b700 -1 bdev(0x562ca0160800 /var/lib/ceph/osd/ceph-13/block) read stalled read 0x105324d2000~2000 (buffered) since 1109868.356733s, timeout is 5.000000s
I randomly checked 3 other shards and the logs do not contain relevant info. e.g. OSD.31:
debug 2021-12-12T14:19:32.283+0000 7f005a51c700 1 osd.31 pg_epoch: 53118 pg[19.1s7( v 53117'72309 (52566'69024,53117'72309] local-lis/les=53107/53108 n=1968 ec=45495/45495 lis/c=53107/53107 les/c/f=53108/53108/0 sis=53118 pruub=15.214573860s) [35,42,44,38,3,NONE,29,31,4,18]p35(0) r=7 lpr=53118 pi=[53107,53118)/1 luod=0'0 crt=53117'72309 mlcod 0'0 active pruub 5120667.000000000s@ mbc={}] start_peering_interval up [35,42,44,38,3,22,29,31,4,18] -> [35,42,44,38,3,2147483647,29,31,4,18], acting [35,42,44,38,3,22,29,31,4,18] -> [35,42,44,38,3,2147483647,29,31,4,18], acting_primary 35(0) -> 35, up_primary 35(0) -> 35, role 7 -> 7, features acting 4540138297136906239 upacting 4540138297136906239
debug 2021-12-12T14:19:32.283+0000 7f005a51c700 1 osd.31 pg_epoch: 53118 pg[19.1s7( v 53117'72309 (52566'69024,53117'72309] local-lis/les=53107/53108 n=1968 ec=45495/45495 lis/c=53107/53107 les/c/f=53108/53108/0 sis=53118 pruub=15.214481354s) [35,42,44,38,3,NONE,29,31,4,18]p35(0) r=7 lpr=53118 pi=[53107,53118)/1 crt=53117'72309 mlcod 0'0 unknown NOTIFY pruub 5120667.000000000s@ mbc={}] state<Start>: transitioning to Stray
debug 2021-12-12T14:19:32.283+0000 7f005ad1d700 1 osd.31 pg_epoch: 53118 pg[19.5s6( v 53111'72899 (52576'69907,53111'72899] local-lis/les=53107/53108 n=1979 ec=45495/45495 lis/c=53107/53107 les/c/f=53108/53108/0 sis=53118 pruub=14.511599541s) [27,17,6,43,NONE,41,31,24,14,25]p27(0) r=6 lpr=53118 pi=[53107,53118)/1 luod=0'0 crt=53111'72899 mlcod 0'0 active pruub 5120666.500000000s@ mbc={}] start_peering_interval up [27,17,6,43,22,41,31,24,14,25] -> [27,17,6,43,2147483647,41,31,24,14,25], acting [27,17,6,43,22,41,31,24,14,25] -> [27,17,6,43,2147483647,41,31,24,14,25], acting_primary 27(0) -> 27, up_primary 27(0) -> 27, role 6 -> 6, features acting 4540138297136906239 upacting 4540138297136906239
debug 2021-12-12T14:19:32.283+0000 7f0059d1b700 1 osd.31 pg_epoch: 53118 pg[5.20a( v 53117'4786450 (52645'4783228,53117'4786450] local-lis/les=53107/53108 n=255528 ec=29993/23 lis/c=53107/53107 les/c/f=53108/53109/0 sis=53118 pruub=7.012211323s) [31] r=0 lpr=53118 pi=[53107,53118)/1 crt=53117'4786450 lcod 53117'4786449 mlcod 53117'4786449 active pruub 5120659.000000000s@ mbc={255={}}] start_peering_interval up [31,22] -> [31], acting [31,22] -> [31], acting_primary 31 -> 31, up_primary 31 -> 31, role 0 -> 0, features acting 4540138297136906239 upacting 4540138297136906239
debug 2021-12-12T14:19:32.283+0000 7f005ad1d700 1 osd.31 pg_epoch: 53118 pg[19.5s6( v 53111'72899 (52576'69907,53111'72899] local-lis/les=53107/53108 n=1979 ec=45495/45495 lis/c=53107/53107 les/c/f=53108/53108/0 sis=53118 pruub=14.511482239s) [27,17,6,43,NONE,41,31,24,14,25]p27(0) r=6 lpr=53118 pi=[53107,53118)/1 crt=53111'72899 mlcod 0'0 unknown NOTIFY pruub 5120666.500000000s@ mbc={}] state<Start>: transitioning to Stray
debug 2021-12-12T14:19:32.283+0000 7f005ad1d700 1 osd.31 pg_epoch: 53118 pg[19.1es4( v 53117'70385 (52571'67510,53117'70385] local-lis/les=53108/53109 n=1896 ec=45495/45495 lis/c=53108/52205 les/c/f=53109/52206/0 sis=53118 pruub=9.612998962s) [NONE,27,36,14,31,1,28,38,44,25]/[NONE,13,36,14,31,1,28,38,44,25]p13(1) r=4 lpr=53118 pi=[52205,53118)/4 luod=0'0 crt=53117'70385 mlcod 0'0 active+remapped pruub 5120662.000000000s@ mbc={}] start_peering_interval up [22,27,36,14,31,1,28,38,44,25] -> [2147483647,27,36,14,31,1,28,38,44,25], acting [22,13,36,14,31,1,28,38,44,25] -> [2147483647,13,36,14,31,1,28,38,44,25], acting_primary 22(0) -> 13, up_primary 22(0) -> 27, role 4 -> 4, features acting 4540138297136906239 upacting 4540138297136906239
debug 2021-12-12T14:19:32.283+0000 7f0059d1b700 1 osd.31 pg_epoch: 53118 pg[5.20a( v 53117'4786450 (52645'4783228,53117'4786450] local-lis/les=53107/53108 n=255528 ec=29993/23 lis/c=53107/53107 les/c/f=53108/53109/0 sis=53118 pruub=7.012211323s) [31] r=0 lpr=53118 pi=[53107,53118)/1 crt=53117'4786450 lcod 53117'4786449 mlcod 0'0 unknown pruub 5120659.000000000s@ mbc={}] state<Start>: transitioning to Primary
debug 2021-12-12T14:19:32.283+0000 7f005ad1d700 1 osd.31 pg_epoch: 53118 pg[19.1es4( v 53117'70385 (52571'67510,53117'70385] local-lis/les=53108/53109 n=1896 ec=45495/45495 lis/c=53108/52205 les/c/f=53109/52206/0 sis=53118 pruub=9.612906456s) [NONE,27,36,14,31,1,28,38,44,25]/[NONE,13,36,14,31,1,28,38,44,25]p13(1) r=4 lpr=53118 pi=[52205,53118)/4 crt=53117'70385 mlcod 0'0 remapped NOTIFY pruub 5120662.000000000s@ mbc={}] state<Start>: transitioning to Stray
debug 2021-12-12T14:19:32.283+0000 7f0059d1b700 1 osd.31 pg_epoch: 53118 pg[5.cf( v 53117'4720646 (52740'4717793,53117'4720646] local-lis/les=53107/53108 n=255904 ec=2990/23 lis/c=53107/53107 les/c/f=53108/53108/0 sis=53118 pruub=8.645213127s) [31] r=0 lpr=53118 pi=[53107,53118)/1 luod=0'0 lua=53102'4720614 crt=53117'4720646 lcod 53117'4720645 mlcod 0'0 active pruub 5120661.000000000s@ mbc={}] start_peering_interval up [22,31] -> [31], acting [22,31] -> [31], acting_primary 22 -> 31, up_primary 22 -> 31, role 1 -> 0, features acting 4540138297136906239 upacting 4540138297136906239
debug 2021-12-12T14:19:32.283+0000 7f0059d1b700 1 osd.31 pg_epoch: 53118 pg[5.cf( v 53117'4720646 (52740'4717793,53117'4720646] local-lis/les=53107/53108 n=255904 ec=2990/23 lis/c=53107/53107 les/c/f=53108/53108/0 sis=53118 pruub=8.645213127s) [31] r=0 lpr=53118 pi=[53107,53118)/1 crt=53117'4720646 lcod 53117'4720645 mlcod 0'0 unknown pruub 5120661.000000000s@ mbc={}] state<Start>: transitioning to Primary
debug 2021-12-12T14:19:32.283+0000 7f0059d1b700 1 osd.31 pg_epoch: 53118 pg[6.11( empty local-lis/les=53107/53108 n=0 ec=145/145 lis/c=53107/53107 les/c/f=53108/53108/34773 sis=53118 pruub=7.010226727s) [31,29] r=0 lpr=53118 pi=[53107,53118)/1 crt=0'0 mlcod 0'0 active pruub 5120659.000000000s@ mbc={}] start_peering_interval up [31,22,29] -> [31,29], acting [31,22,29] -> [31,29], acting_primary 31 -> 31, up_primary 31 -> 31, role 0 -> 0, features acting 4540138297136906239 upacting 4540138297136906239
debug 2021-12-12T14:19:32.283+0000 7f0059d1b700 1 osd.31 pg_epoch: 53118 pg[6.11( empty local-lis/les=53107/53108 n=0 ec=145/145 lis/c=53107/53107 les/c/f=53108/53108/34773 sis=53118 pruub=7.010226727s) [31,29] r=0 lpr=53118 pi=[53107,53118)/1 crt=0'0 mlcod 0'0 unknown pruub 5120659.000000000s@ mbc={}] state<Start>: transitioning to Primary