Project

General

Profile

Bug #57629

Updated by Samuel Just over 1 year ago

Release build: ./do_cmake.sh -DWITH_SEASTAR=ON -DWITH_MGR_DASHBOARD_FRONTEND=OFF -DWITH_CCACHE=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo -DWITH_TESTS=OFF

vstart: MGR=1 MON=1 OSD=1 MDS=0 RGW=0 ../src/vstart.sh -n -x --without-dashboard --crimson --nodaemon --redirect-output -d


<pre>
/home/sam/git-checkouts/ceph2/build/bin/crimson-osd -i 0 -c /home/sam/git-checkouts/ceph2/build/ceph.conf --mkfs --key AQDipStj9WAlNhAA9y0F8t9XWwuwsFxiZs7WSQ== --osd-uuid 7048b403-07c2-474e-afc1-380394a4ca6e --smp 1 --cpuset 0 --debug
WARN 2022-09-22 00:01:39,378 [shard 0] seastar - Creation of perf_event based stall detector failed, falling back to posix timer: std::system_error (error system:13, perf_event_open() failed: Permission denied)
INFO 2022-09-22 00:01:39,379 [shard 0] seastar - Created fair group io-queue-0, capacity rate 2147483:2147483, limit 12582912, rate 16777216 (factor 1), threshold 2000
INFO 2022-09-22 00:01:39,379 [shard 0] seastar - Created io group dev(0), length limit 4194304:4194304, rate 2147483647:2147483647
INFO 2022-09-22 00:01:39,379 [shard 0] seastar - Created io queue dev(0) capacities: 512:2000/2000 1024:3000/3000 2048:5000/5000 4096:9000/9000 8192:17000/17000 16384:33000/33000 32768:65000/65000 65536:129000/129000 131072:257000/257000
Segmentation fault on shard 0.
Backtrace:
0# realloc at /home/sam/git-checkouts/ceph2/src/seastar/src/core/memory.cc:544
1# OPENSSL_LH_insert in /lib64/libcrypto.so.3
2# 0x00007F06EE0960E3 in /lib64/libcrypto.so.3
3# 0x00007F06EDFBD2E4 in /lib64/libcrypto.so.3
4# 0x00007F06ED1B7029 in /lib64/libc.so.6
5# CRYPTO_THREAD_run_once in /lib64/libcrypto.so.3
6# OPENSSL_init_crypto in /lib64/libcrypto.so.3
7# 0x00007F06EDF74C23 in /lib64/libcrypto.so.3
8# ERR_set_mark in /lib64/libcrypto.so.3
9# CONF_modules_load_file_ex in /lib64/libcrypto.so.3
10# 0x00007F06EDFBE5A0 in /lib64/libcrypto.so.3
11# 0x00007F06ED1B7029 in /lib64/libc.so.6
12# CRYPTO_THREAD_run_once in /lib64/libcrypto.so.3
13# OPENSSL_init_crypto in /lib64/libcrypto.so.3
14# 0x00007F06EE08FA7E in /lib64/libcrypto.so.3
15# 0x00007F06EDF953B1 in /lib64/libcrypto.so.3
16# EVP_CipherInit_ex in /lib64/libcrypto.so.3
17# ceph::crypto::onwire::rxtx_t::create_handler_pair(crimson::common::CephContext*, AuthConnectionMeta const&, bool, bool) at /home/sam/git-checkouts/ceph2/src/msg/async/crypto_onwire.cc:180
18# crimson::net::ProtocolV2::handle_auth_reply()::{lambda(ceph::msgr::v2::Tag)#1}::operator()(ceph::msgr::v2::Tag) const::{lambda()#4}::operator()() const at /home/sam/git-checkouts/ceph2/src/crimson/net/ProtocolV2.cc:560
19# crimson::net::ProtocolV2::handle_auth_reply()::{lambda(ceph::msgr::v2::Tag)#1}::operator()(ceph::msgr::v2::Tag) const at /home/sam/git-checkouts/ceph2/src/seastar/include/seastar/core/future.hh:2141
20# seastar::future_state_base::any::available() const at /home/sam/git-checkouts/ceph2/src/seastar/include/seastar/core/future.hh:459
21# seastar::reactor::run_tasks(seastar::reactor::task_queue&) at /home/sam/git-checkouts/ceph2/src/seastar/src/core/reactor.cc:2353
22# seastar::reactor::run_some_tasks() at /home/sam/git-checkouts/ceph2/src/seastar/src/core/reactor.cc:2766
23# seastar::reactor::do_run() at /home/sam/git-checkouts/ceph2/src/seastar/src/core/reactor.cc:2935
24# seastar::reactor::run() at /home/sam/git-checkouts/ceph2/src/seastar/src/core/reactor.cc:2823
25# seastar::app_template::run_deprecated(int, char**, std::function<void ()>&&) at /home/sam/git-checkouts/ceph2/src/seastar/src/core/app-template.cc:266
26# seastar::app_template::run(int, char**, std::function<seastar::future<int> ()>&&) at /home/sam/git-checkouts/ceph2/src/seastar/src/core/app-template.cc:156
27# main at /usr/include/c++/12/bits/std_function.h:334
28# 0x00007F06ED14E590 in /lib64/libc.so.6
29# __libc_start_main in /lib64/libc.so.6
30# _start in /home/sam/git-checkouts/ceph2/build/bin/crimson-osd
</pre>

On a branch based on 73ce96de86bd59e2d926ac238126d74d06364f14 from main, diff (should be irrelevant, but included for completeness):

<pre>
diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc
index fca707f9851..8d6c587c751 100644
--- a/src/crimson/osd/osd_operations/client_request.cc
+++ b/src/crimson/osd/osd_operations/client_request.cc
@@ -202,13 +202,14 @@ ClientRequest::process_op(instance_handle_t &ihref, Ref<PG> &pg)
[this, pg]() mutable {
return do_recover_missing(pg, m->get_hobj());
}).then_interruptible([this, pg, &ihref]() mutable {
- return pg->already_complete(m->get_reqid()).then_unpack_interruptible(
- [this, pg, &ihref](bool completed, int ret) mutable
+ return pg->already_complete(m->get_reqid()).then_interruptible(
+ [this, pg, &ihref](auto completed) mutable
-> PG::load_obc_iertr::future<seq_mode_t> {
if (completed) {
auto reply = crimson::make_message<MOSDOpReply>(
- m.get(), ret, pg->get_osdmap_epoch(),
+ m.get(), completed->err, pg->get_osdmap_epoch(),
CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, false);
+ reply->set_reply_versions(completed->version, completed->user_version);
return conn->send(std::move(reply)).then([] {
return seastar::make_ready_future<seq_mode_t>(seq_mode_t::OUT_OF_ORDER);
});
diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc
index 15b61225752..18e597e9938 100644
--- a/src/crimson/osd/pg.cc
+++ b/src/crimson/osd/pg.cc
@@ -1445,7 +1445,7 @@ bool PG::is_degraded_or_backfilling_object(const hobject_t& soid) const {
return false;
}

-PG::interruptible_future<std::tuple<bool, int>>
+PG::interruptible_future<std::optional<PG::complete_op_t>>
PG::already_complete(const osd_reqid_t& reqid)
{
eversion_t version;
@@ -1455,11 +1455,15 @@ PG::already_complete(const osd_reqid_t& reqid)

if (peering_state.get_pg_log().get_log().get_request(
reqid, &version, &user_version, &ret, &op_returns)) {
- return backend->request_committed(reqid, version).then([ret] {
- return seastar::make_ready_future<std::tuple<bool, int>>(true, ret);
+ complete_op_t dupinfo{
+ user_version,
+ version,
+ ret};
+ return backend->request_committed(reqid, version).then([dupinfo] {
+ return seastar::make_ready_future<std::optional<complete_op_t>>(dupinfo);
});
} else {
- return seastar::make_ready_future<std::tuple<bool, int>>(false, 0);
+ return seastar::make_ready_future<std::optional<complete_op_t>>(std::nullopt);
}
}

diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h
index 7a5d6f7075e..1fb8d4dbbc9 100644
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -711,7 +711,14 @@ public:
return &it->second;
}
}
- interruptible_future<std::tuple<bool, int>> already_complete(const osd_reqid_t& reqid);
+
+ struct complete_op_t {
+ const version_t user_version;
+ const eversion_t version;
+ const int err;
+ };
+ interruptible_future<std::optional<complete_op_t>>
+ already_complete(const osd_reqid_t& reqid);
int get_recovery_op_priority() const {
int64_t pri = 0;
get_pgpool().info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri);
</pre>

Back