The main crash happens here:
void ECBackend::continue_recovery_op(
RecoveryOp &op,
RecoveryMessages *m)
{
dout(10) << __func__ << ": continuing " << op << dendl;
while (1) {
switch (op.state) {
// ...
case RecoveryOp::READING: {
// read completed, start write
ceph_assert(op.xattrs.size());
ceph_assert(op.returned_data.size());
op.state = RecoveryOp::WRITING;
ObjectRecoveryProgress after_progress = op.recovery_progress;
after_progress.data_recovered_to += op.extent_requested.second;
after_progress.first = false;
if (after_progress.data_recovered_to >= op.obc->obs.oi.size) {
after_progress.data_recovered_to =
sinfo.logical_to_next_stripe_offset(
op.obc->obs.oi.size);
after_progress.data_complete = true;
}
for (set<pg_shard_t>::iterator mi = op.missing_on.begin();
mi != op.missing_on.end();
++mi) {
ceph_assert(op.returned_data.count(mi->shard));
m->pushes[*mi].push_back(PushOp());
PushOp &pop = m->pushes[*mi].back();
pop.soid = op.hoid;
pop.version = op.v;
pop.data = op.returned_data[mi->shard];
dout(10) << __func__ << ": before_progress=" << op.recovery_progress
<< ", after_progress=" << after_progress
<< ", pop.data.length()=" << pop.data.length()
<< ", size=" << op.obc->obs.oi.size << dendl;
ceph_assert(
pop.data.length() ==
sinfo.aligned_logical_offset_to_chunk_offset(
after_progress.data_recovered_to -
op.recovery_progress.data_recovered_to)
);
The preceding debug would throw much, much more light:
dout(10) << __func__ << ": before_progress=" << op.recovery_progress
<< ", after_progress=" << after_progress
<< ", pop.data.length()=" << pop.data.length()
<< ", size=" << op.obc->obs.oi.size << dendl;
Unfortunately, it has been recorded in the logs (it's at the 10th level). Do we have a coredump by any chance?
In the meantime: the crash happened at the finish at of reading the 3:7c000065:::rbd_data.4.66c0ce6b3ae46.00000000000049f1:head
object. The read has been performed for the sake of backilling:
$ less logs/osds/balin012/ceph-osd.138.log.3
...
-21> 2024-01-26T19:53:02.149+0100 7f1d283776c0 5 osd.138 pg_epoch: 23531 pg[3.3es1( v 22804'289525 (22661'279455,22804'289525] local-lis/les=22986/22987 n=28777 ec=22926/13930 lis/c=22986/22926 les/c/f=22987
/22927/0 sis=23530) [NONE,138,152,64,34,84,47]/[NONE,138,152,64,34,NONE,47]p138(1) backfill=[84(5)] r=1 lpr=23530 pi=[22926,23530)/2 crt=22804'289525 lcod 0'0 mlcod 0'0 undersized+degraded+remapped+backfill_wait
+peered mbc={}] enter Started/Primary/Active/Backfilling
-6> 2024-01-26T19:53:02.189+0100 7f1d283776c0 5 osd.138 pg_epoch: 23531 pg[3.3es1( v 22804'289525 (22661'279455,22804'289525] local-lis/les=22986/22987 n=28777 ec=22926/13930 lis/c=22986/22926 les/c/f=22987/22927/0 sis=23530) [NONE,138,152,64,34,84,47]/[NONE,138,152,64,34,NONE,47]p138(1) backfill=[84(5)] r=1 lpr=23530 pi=[22926,23530)/2 crt=22804'289525 lcod 0'0 mlcod 0'0 undersized+degraded+remapped+backfilling+peered rops=1 mbc={}] backfill_pos is MIN
-5> 2024-01-26T19:53:02.189+0100 7f1d283776c0 5 osd.138 pg_epoch: 23531 pg[3.3es1( v 22804'289525 (22661'279455,22804'289525] local-lis/les=22986/22987 n=28777 ec=22926/13930 lis/c=22986/22926 les/c/f=22987/22927/0 sis=23530) [NONE,138,152,64,34,84,47]/[NONE,138,152,64,34,NONE,47]p138(1) backfill=[84(5)] r=1 lpr=23530 pi=[22926,23530)/2 crt=22804'289525 lcod 0'0 mlcod 0'0 undersized+degraded+remapped+backfilling+peered rops=1 mbc={}] backfill_pos is 3:7c000065:::rbd_data.4.66c0ce6b3ae46.00000000000049f1:head
-1> 2024-01-26T19:53:02.269+0100 7f1d283776c0 -1 ./src/osd/ECBackend.cc: In function 'void ECBackend::continue_recovery_op(RecoveryOp&, RecoveryMessages*)' thread 7f1d283776c0 time 2024-01-26T19:53:02.259931+0100
0> 2024-01-26T19:53:02.277+0100 7f1d283776c0 -1 *** Caught signal (Aborted) **
in thread 7f1d283776c0 thread_name:tp_osd_tp
7f1d283776c0 / tp_osd_tp
What's interesting is the fact other OSD has failed exactly on the same object, but at different place – at decoding the chunks:
$ less logs/osds/balin018/ceph-osd.206.log.1
...
-7> 2024-01-26T19:15:47.633+0100 7f0fe36dc6c0 5 osd.206 pg_epoch: 23172 pg[3.3es0( v 22804'289525 (22661'279455,22804'289525] local-lis/les=22986/22987 n=28777 ec=22926/13930 lis/c=22986/22926 les/c/f=22987
/22927/0 sis=23171) [206,138,154,64,33,84,47]/[206,138,NONE,64,NONE,NONE,47]p206(0) backfill=[33(4),84(5),154(2)] r=0 lpr=23171 pi=[22926,23171)/2 crt=22804'289525 lcod 0'0 mlcod 0'0 undersized+degraded+remapped+backfilling+peered rops=1 mbc={}] backfill_pos is 3:7c000065:::rbd_data.4.66c0ce6b3ae46.00000000000049f1:head
-6> 2024-01-26T19:15:47.633+0100 7f0fef6f46c0 3 osd.206 23172 handle_osd_map epochs [23172,23172], i have 23172, src has [22554,23172]
-5> 2024-01-26T19:15:47.741+0100 7f10007386c0 10 monclient: get_auth_request con 0x556aff6a8c00 auth_method 0
-4> 2024-01-26T19:15:47.741+0100 7f10007386c0 10 monclient: handle_auth_request added challenge on 0x556aff6a8400
-3> 2024-01-26T19:15:47.741+0100 7f0fef6f46c0 2 osd.206 23172 ms_handle_reset con 0x556aff6a8400 session 0x556af4d14f00
-2> 2024-01-26T19:15:47.745+0100 7f0ff1ef96c0 5 prioritycache tune_memory target: 4294967296 mapped: 318676992 unmapped: 638656512 heap: 957333504 old mem: 2845415832 new mem: 2845415832
-1> 2024-01-26T19:15:47.793+0100 7f0fe36dc6c0 -1 ./src/osd/ECUtil.cc: In function 'int ECUtil::decode(const stripe_info_t&, ceph::ErasureCodeInterfaceRef&, std::map<int, ceph::buffer::v15_2_0::list>&, std::map<int, ceph::buffer::v15_2_0::list*>&)' thread 7f0fe36dc6c0 time 2024-01-26T19:15:47.781339+0100
./src/osd/ECUtil.cc: 110: FAILED ceph_assert(r == 0)
ceph version 17.2.7 (2dd3854d5b35a35486e86e2616727168e244f470) quincy (stable)
1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x12a) [0x556ac3c8b32b]
2: /usr/bin/ceph-osd(+0x5894c6) [0x556ac3c8b4c6]
3: (ECUtil::decode(ECUtil::stripe_info_t const&, std::shared_ptr<ceph::ErasureCodeInterface>&, std::map<int, ceph::buffer::v15_2_0::list, std::less<int>, std::allocator<std::pair<int const, ceph::buffer::v15_2_0::list> > >&, std::map<int, ceph::buffer::v15_2_0::list*, std::less<int>, std::allocator<std::pair<int const, ceph::buffer::v15_2_0::list*> > >&)+0x842) [0x556ac4072102]
4: (ECBackend::handle_recovery_read_complete(hobject_t const&, boost::tuples::tuple<unsigned long, unsigned long, std::map<pg_shard_t, ceph::buffer::v15_2_0::list, std::less<pg_shard_t>, std::allocator<std::pair<pg_shard_t const, ceph::buffer::v15_2_0::list> > >, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type>&, std::optional<std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > >, RecoveryMessages*)+0x483) [0x556ac41b1013]
5: (OnRecoveryReadComplete::finish(std::pair<RecoveryMessages*, ECBackend::read_result_t&>&)+0x6d) [0x556ac41d394d]
6: (ECBackend::complete_read_op(ECBackend::ReadOp&, RecoveryMessages*)+0x8f) [0x556ac41a135f]
7: (ECBackend::handle_sub_read_reply(pg_shard_t, ECSubReadReply&, RecoveryMessages*, ZTracer::Trace const&)+0xcc6) [0x556ac41b9bb6]
8: (ECBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x2ad) [0x556ac41ba5ad]
9: (PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x45) [0x556ac3f8bd05]
10: (PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x508) [0x556ac3f2ce98]
11: (OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x199) [0x556ac3d9eb89]
12: (ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x65) [0x556ac4075c95]
13: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x628) [0x556ac3db5718]
14: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x3f4) [0x556ac44a4ac4]
15: (ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x556ac44a7850]
16: /lib/x86_64-linux-gnu/libc.so.6(+0x89044) [0x7f1001aa8044]
17: /lib/x86_64-linux-gnu/libc.so.6(+0x10961c) [0x7f1001b2861c]
To summarize (as the post is pretty long): we have a coredump by any chance?