Actions
Bug #57578
closedcrimson: assertion failure in _do_transaction_step()
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Description
Cluster based on dc9b89d619920da9b69b72e80ffdf057f865be50
deployed with:
MDS=0 MGR=1 OSD=1 MON=1 ../src/vstart.sh -n --crimson --seastore --nolockdep --nodaemon --redirect-output --without-dashboard --no-restart -o "debug_objclass=20" -o "debug_osd=20" -o "debug_none=20"
explodes during:
CRIMSON_COMPAT=true RBD_FEATURES=1 bin/ceph_test_librbd ... [ OK ] TestLibRBD.TestIOPPWithIOHint (17868 ms) [ RUN ] TestLibRBD.TestIOToSnapshot using new format! num snaps is: 0 expected: 0 num snaps is: 1 expected: 1 snap: orig found orig with size 2097152 read: 80 write test data!
On all the OSDs:
DEBUG 2022-09-16 13:59:04,001 [shard 0] osd - write_log_and_missing with: dirty_to: 0'0, dirty_from: 4294967295'18446744073709551615, writeout_from: 119'33, trimmed: , trimmed_dups: , clear_divergent_priors: 0 DEBUG 2022-09-16 13:59:04,001 [shard 0] osd - final snapset 18={}:{18={18}} in 2:c5e87adb:::rbd_data.101d3949a6b0.0000000000000000:head DEBUG 2022-09-16 13:59:04,001 [shard 0] osd - ReplicatedBackend::_submit_transaction: do_transaction... ERROR 2022-09-16 13:59:04,001 [shard 0] none - ../src/crimson/os/seastore/seastore.cc:1345 : In function 'crimson::os::seastore::SeaStore::_do_transaction_step(crimson::os::seastore::SeaStore::internal_context_t&, crimson::os::FuturizedStore::CollectionRef&, std::vector<boost::intrusive_ptr<crimson::os::seastore::Onode> >&, std::vector<boost::intrusive_ptr<crimson::os::seastore::Onode> >&, ceph::os::Transaction::iterator&)::<lambda()>', abort(%s) unexpected enoent error
Backtrace:
0# gsignal in /lib64/libc.so.6 1# abort in /lib64/libc.so.6 2# ceph::__ceph_abort(char const*, int, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) at /home/rzarzynski/ceph1/build/../src/seastar/include/seastar/util/log.hh:106 3# _ZN7seastar15futurize_invokeIZN7crimsonL8composerINS1_9erroratorIJNS1_19unthrowable_wrapperIRKSt10error_codeL_ZNS1_2ecILi5EEEEEEEE12pass_furtherEJZNS4_IS7_L_ZNS8_ILi2EEEEE6handleIZNS1_2os8seastore8SeaStore20_do_transaction_stepERNSG_18internal_context_tERN5boost13intrusive_ptrINSE_19FuturizedCollectionEEERSt6vectorINSK_INSF_5OnodeEEESaISQ_EEST_RN4ceph2os11Transaction8iteratorEEUlvE0_EEDaOT_EUlRKSC_E_NS1_8ct_error10assert_allEEEEDaS11_DpOT0_EUlDpOT_E_JS12_EEEDaS11_S19_ at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/basic_string.h:672 4# _ZN7crimson13interruptible8internal27call_with_interruption_implINS_2os8seastore28TransactionConflictConditionEZNS_L8composerINS_9erroratorIJNS_19unthrowable_wrapperIRKSt10error_codeL_ZNS_2ecILi5EEEEEEEE12pass_furtherEJZNS8_ISB_L_ZNSC_ILi2EEEEE6handleIZNS4_8SeaStore20_do_transaction_stepERNSI_18internal_context_tERN5boost13intrusive_ptrINS3_19FuturizedCollectionEEERSt6vectorINSM_INS4_5OnodeEEESaISS_EESV_RN4ceph2os11Transaction8iteratorEEUlvE0_EEDaOT_EUlRKSG_E_NS_8ct_error10assert_allEEEEDaS13_DpOT0_EUlDpOT_E_JS14_EEEDaN7seastar13lw_shared_ptrIS12_EEOT0_DpOT1_ at /home/rzarzynski/ceph1/build/../src/crimson/common/interruptible_future.h:193 5# _ZN7crimson13interruptible22call_with_interruptionINS_2os8seastore28TransactionConflictConditionEZNS_L8composerINS_9erroratorIJNS_19unthrowable_wrapperIRKSt10error_codeL_ZNS_2ecILi5EEEEEEEE12pass_furtherEJZNS7_ISA_L_ZNSB_ILi2EEEEE6handleIZNS3_8SeaStore20_do_transaction_stepERNSH_18internal_context_tERN5boost13intrusive_ptrINS2_19FuturizedCollectionEEERSt6vectorINSL_INS3_5OnodeEEESaISR_EESU_RN4ceph2os11Transaction8iteratorEEUlvE0_EEDaOT_EUlRKSF_E_NS_8ct_error10assert_allEEEEDaS12_DpOT0_EUlDpOT_E_S13_N7seastar6futureIvEELi0EEEDaNS1F_13lw_shared_ptrIS11_EEOT0_OT1_ at /home/rzarzynski/ceph1/build/../src/crimson/common/interruptible_future.h:253 6# operator()<const crimson::unthrowable_wrapper<const std::error_code&, ((const std::error_code&)(& crimson::ec<2>))>&> at /home/rzarzynski/ceph1/build/../src/crimson/common/interruptible_future.h:951 7# _ZN7crimson9erroratorIJNS_19unthrowable_wrapperIRKSt10error_codeL_ZNS_2ecILi5EEEEEENS1_IS4_L_ZNS5_ILi2EEEEEENS1_IS4_L_ZNS5_ILi75EEEEEEEE7_futureINS_23errorated_future_markerIvEEE24_safe_then_handle_errorsINS0_IJS6_EE8futurizeIN7seastar6futureIvEEEESJ_ZNS_13interruptible27interruptible_future_detailINS_2os8seastore28TransactionConflictConditionESD_E26handle_error_interruptibleILb1EZNS_L8composerINSF_12pass_furtherEJZNS7_6handleIZNSO_8SeaStore20_do_transaction_stepERNSV_18internal_context_tERN5boost13intrusive_ptrINSN_19FuturizedCollectionEEERSt6vectorINSZ_INSO_5OnodeEEESaIS15_EES18_RN4ceph2os11Transaction8iteratorEEUlvE0_EEDaOT_EUlRKS7_E_NS_8ct_error10assert_allEEEEDaS1G_DpOT0_EUlDpOT_E_EEDaOT0_EUlS1G_E_EEDaS1U_OT1_ at /home/rzarzynski/ceph1/build/../src/crimson/common/errorator.h:458 8# operator()<seastar::future<> > at /home/rzarzynski/ceph1/build/../src/crimson/common/errorator.h:718 9# seastar::continuation<seastar::internal::promise_base_with_type<void>, seastar::noncopyable_function<crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<5> > >::_future<crimson::errorated_future_marker<void> > (seastar::future<void>&&)>, seastar::future<void>::then_wrapped_nrvo<crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<5> > >::_future<crimson::errorated_future_marker<void> >, seastar::noncopyable_function<crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<5> > >::_future<crimson::errorated_future_marker<void> > (seastar::future<void>&&)> >(seastar::noncopyable_function<crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<5> > >::_future<crimson::errorated_future_marker<void> > (seastar::future<void>&&)>&&)::{lambda(seastar::internal::promise_base_with_type<void>&&, seastar::noncopyable_function<crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<5> > >::_future<crimson::errorated_future_marker<void> > (seastar::future<void>&&)>&, seastar::future_state<seastar::internal::monostate>&&)#1}, void>::run_and_dispose() at /home/rzarzynski/ceph1/build/../src/seastar/include/seastar/util/noncopyable_function.hh:209 10# seastar::reactor::run_tasks(seastar::reactor::task_queue&) at /home/rzarzynski/ceph1/build/../src/seastar/src/core/reactor.cc:2353 11# seastar::reactor::run_some_tasks() at /home/rzarzynski/ceph1/build/../src/seastar/src/core/reactor.cc:2766 12# seastar::reactor::do_run() at /home/rzarzynski/ceph1/build/../src/seastar/src/core/reactor.cc:2934 13# seastar::reactor::run() at /home/rzarzynski/ceph1/build/../src/seastar/src/core/reactor.cc:2817 14# seastar::app_template::run_deprecated(int, char**, std::function<void ()>&&) at /home/rzarzynski/ceph1/build/../src/seastar/src/core/app-template.cc:265 15# seastar::app_template::run(int, char**, std::function<seastar::future<int> ()>&&) at /home/rzarzynski/ceph1/build/../src/seastar/src/core/app-template.cc:156 16# main at /home/rzarzynski/ceph1/build/../src/crimson/osd/main.cc:231 17# __libc_start_main in /lib64/libc.so.6 18# _start in /home/rzarzynski/ceph1/build/bin/crimson-osd
Updated by Radoslaw Zarzynski over 1 year ago
The same test fails on BlueStore too.
Updated by Radoslaw Zarzynski over 1 year ago
- Subject changed from seastore: assertion failure in _do_transaction_step() to crimson: assertion failure in _do_transaction_step()
The problem is that we prepend the clone
:
ERROR 2022-09-15 20:46:06,872 [shard 0] none - bluestore(/var/lib/ceph/osd/ceph-1) _txc_add_transaction error (2) No such file or directory not handled on operation 17 (op 0, counting from 0) ERROR 2022-09-15 20:46:06,872 [shard 0] none - bluestore(/var/lib/ceph/osd/ceph-1) ENOENT on clone suggests osd bug WARN 2022-09-15 20:46:06,872 [shard 0] bluestore - _dump_transaction transaction dump: { "ops": [ { "op_num": 0, "op_name": "clone", "collection": "3.d_head", "src_oid": "#3:b585768d:::rbd_data.1057580502b4.0000000000000000:head#", "dst_oid": "#3:b585768d:::rbd_data.1057580502b4.0000000000000000:18#" }, { "op_num": 1, "op_name": "touch", "collection": "3.d_head", "oid": "#3:b585768d:::rbd_data.1057580502b4.0000000000000000:head#" },
while lacking the special handling of create
. In the classical OSD we have:
void generate_transaction(
PGTransactionUPtr &pgt,
const coll_t &coll,
vector<pg_log_entry_t> &log_entries,
ObjectStore::Transaction *t,
set<hobject_t> *added,
set<hobject_t> *removed,
const ceph_release_t require_osd_release = ceph_release_t::unknown )
{
// ...
pgt->safe_create_traverse(
[&](pair<const hobject_t, PGTransaction::ObjectOperation> &obj_op) {
// ...
match(
op.init_type,
[&](const PGTransaction::ObjectOperation::Init::None &) {
},
[&](const PGTransaction::ObjectOperation::Init::Create &op) {
if (require_osd_release >= ceph_release_t::octopus) {
t->create(coll, goid);
} else {
t->touch(coll, goid);
}
},
[&](const PGTransaction::ObjectOperation::Init::Clone &op) {
t->clone(
coll,
ghobject_t(
op.source, ghobject_t::NO_GEN, shard_id_t::NO_SHARD),
goid);
},
Updated by Radoslaw Zarzynski over 1 year ago
- Status changed from New to In Progress
- Assignee set to Radoslaw Zarzynski
Updated by Radoslaw Zarzynski over 1 year ago
The correct order is:
2022-09-26T09:46:54.509+0000 7f74bde73700 30 _dump_transaction transaction dump: { "ops": [ { "op_num": 0, "op_name": "clone", "collection": "1.0_head", "src_oid": "#1:2f0692fe:::rbd_data.10152970f466.0000000000000000:head#", "dst_oid": "#1:2f0692fe:::rbd_data.10152970f466.0000000000000000:5#" }, { "op_num": 1, "op_name": "rmattr", "collection": "1.0_head", "oid": "#1:2f0692fe:::rbd_data.10152970f466.0000000000000000:5#", "name": "snapset" }, { "op_num": 2, "op_name": "setattrs", "collection": "1.0_head", "oid": "#1:2f0692fe:::rbd_data.10152970f466.0000000000000000:5#", "attr_lens": { "_": 270 } }, { "op_num": 3, "op_name": "remove", "collection": "1.0_head", "oid": "#1:2f0692fe:::rbd_data.10152970f466.0000000000000000:head#" }, { "op_num": 4, "op_name": "create", "collection": "1.0_head", "oid": "#1:2f0692fe:::rbd_data.10152970f466.0000000000000000:head#" }, { "op_num": 5, "op_name": "setattrs", "collection": "1.0_head", "oid": "#1:2f0692fe:::rbd_data.10152970f466.0000000000000000:head#", "attr_lens": { "_": 293, "snapset": 91 } }, { "op_num": 6, "op_name": "omap_setkeys", "collection": "meta", "oid": "#-1:c0371625:::snapmapper:0#", "attr_lens": { "OBJ_0000000000000001.4F0694F7.5.rbd%udata%e10152970f466%e0000000000000000..": 95 } }, { "op_num": 7, "op_name": "omap_setkeys", "collection": "meta", "oid": "#-1:c0371625:::snapmapper:0#", "attr_lens": { "SNA_1_0000000000000005_0000000000000001.4F0694F7.5.rbd%udata%e10152970f466%e0000000000000000..": 91 } }, { "op_num": 8, "op_name": "omap_setkeys", "collection": "1.0_head", "oid": "#1:00000000::::head#", "attr_lens": { "0000000012.00000000000000000010": 230, "0000000012.00000000000000000011": 218, "_info": 1001 } } ] }
crimson-osd without any patches does:
TRACE 2022-09-26 10:13:35,428 [shard 0] bluestore - _dump_transaction transaction dump: { "ops": [ { "op_num": 0, "op_name": "clone", "collection": "1.6_head", "src_oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:head#", "dst_oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:4#" }, { "op_num": 1, "op_name": "touch", "collection": "1.6_head", "oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:head#" }, { "op_num": 2, "op_name": "op_setallochint", "collection": "1.6_head", "oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:head#", "expected_object_size": "4194304", "expected_write_size": "4194304", "alloc_hint_flags": "-" }, { "op_num": 3, "op_name": "write", "collection": "1.6_head", "oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:head#", "length": 80, "offset": 0, "bufferlist length": 80 }, { "op_num": 4, "op_name": "setattr", "collection": "1.6_head", "oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:4#", "name": "_", "length": 232 }, { "op_num": 5, "op_name": "rmattr", "collection": "1.6_head", "oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:4#", "name": "snapset" }, { "op_num": 6, "op_name": "omap_setkeys", "collection": "1.6_head", "oid": "#1:60000000::::head#", "attr_lens": { "0000000009.00000000000000000001": 218, "0000000009.00000000000000000002": 230, "_epoch": 4, "_info": 1001 } }, { "op_num": 7, "op_name": "setattr", "collection": "1.6_head", "oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:head#", "name": "_", "length": 232 }, { "op_num": 8, "op_name": "setattr", "collection": "1.6_head", "oid": "#1:658c2564:::rbd_data.10153a6c1f4a.0000000000000000:head#", "name": "snapset", "length": 107 } ] }
Updated by Radoslaw Zarzynski over 1 year ago
- Status changed from In Progress to Fix Under Review
- Pull request ID set to 48373
Updated by Matan Breizman 12 months ago
- Status changed from Fix Under Review to Resolved
Actions