Actions
Bug #44068
closedrgw crash when 2 zones syncing a pubsub zone
Status:
Resolved
Priority:
Normal
Assignee:
-
Target version:
-
% Done:
0%
Source:
Development
Tags:
multisite
Backport:
nautilus
Regression:
No
Severity:
3 - minor
Reviewed:
Description
- does not happen when there is a single non-pubsub zone
- both non-pubsub RGWs crashed at the same time and on the same code (rgw_data_sync.cc:161)
- crash consistently reproduced when running the rgw multisite test suite locally
int RGWReadDataSyncStatusCoroutine::operate()
{
reenter(this) {
// read sync info
using ReadInfoCR = RGWSimpleRadosReadCR<rgw_data_sync_info>;
yield {
bool empty_on_enoent = false; // fail on ENOENT
call(new ReadInfoCR(sync_env->async_rados, sync_env->svc->sysobj,
rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::sync_status_oid(sc->source_zone)),
&sync_status->sync_info, empty_on_enoent));
}
...
sync_env has null pointers in it:
(gdb) p *sync_env $3 = {dpp = 0x0, cct = 0x0, store = 0x0, svc = 0x0, async_rados = 0x0, http_manager = 0x7f852a101b80, error_logger = 0x0, sync_tracer = 0x0, sync_module = std::shared_ptr<RGWSyncModuleInstance> (empty) = {get() = 0x0}, counters = 0x0}
full backtrace:
#0 0x00007f865ab27b05 in raise () from /lib64/libpthread.so.0 #1 0x00007f86674ab8be in reraise_fatal (signum=11) at /root/projects/ceph/src/global/signal_handler.cc:81 #2 0x00007f86674ac983 in handle_fatal_signal (signum=11) at /root/projects/ceph/src/global/signal_handler.cc:326 #3 <signal handler called> #4 0x00007f8666f86e3f in RGWReadDataSyncStatusCoroutine::operate (this=0x55b496cb8a00) at /root/projects/ceph/src/rgw/rgw_data_sync.cc:161 #5 0x00007f8666c37e87 in RGWCoroutine::operate_wrapper (this=0x55b496cb8a00) at /root/projects/ceph/src/rgw/rgw_coroutine.h:265 #6 0x00007f86670c39cc in RGWCoroutinesStack::operate (this=0x55b496ddedc0, _env=0x7f852a1013d0) at /root/projects/ceph/src/rgw/rgw_coroutine.cc:228 #7 0x00007f86670c5ca7 in RGWCoroutinesManager::run (this=0x7f852a101a70, stacks=std::__cxx11::list = {...}) at /root/projects/ceph/src/rgw/rgw_coroutine.cc:623 #8 0x00007f86670c6f59 in RGWCoroutinesManager::run (this=0x7f852a101a70, op=0x55b496cb8a00) at /root/projects/ceph/src/rgw/rgw_coroutine.cc:762 #9 0x00007f8666f888f3 in RGWRemoteDataLog::read_sync_status (this=0x55b492af32b0, sync_status=0x55b4944dc4c0) at /root/projects/ceph/src/rgw/rgw_data_sync.cc:680 #10 0x00007f8666c79bab in RGWDataSyncStatusManager::read_sync_status (this=0x55b492af3258, sync_status=0x55b4944dc4c0) at /root/projects/ceph/src/rgw/rgw_data_sync.h:410 #11 0x00007f8666c74944 in RGWOp_DATALog_Status::execute (this=0x55b4944dc400) at /root/projects/ceph/src/rgw/rgw_rest_log.cc:1008 #12 0x00007f8666c4f19e in rgw_process_authenticated (handler=0x55b493994f40, op=@0x7f852a102050: 0x55b4944dc400, req=0x7f852a1037a0, s=0x7f852a1026b0, skip_retarget=false) at /root/projects/ceph/src/rgw/rgw_process.cc:161 #13 0x00007f8666c51013 in process_request (store=0x55b493680ac0, rest=0x7ffe2da70cb0, req=0x7f852a1037a0, frontend_prefix="", auth_registry=..., client_io=0x7f852a1037f0, olog=0x0, yield=..., scheduler=0x55b4938ec308, http_ret=0x0) at /root/projects/ceph/src/rgw/rgw_process.cc:278 #14 0x00007f8666b390f1 in (anonymous namespace)::handle_connection<boost::asio::basic_stream_socket<boost::asio::ip::tcp> > (context=..., env=..., stream=..., buffer=..., is_ssl=false, pause_mutex=..., scheduler=0x55b4938ec308, ec=..., yield=...) at /root/projects/ceph/src/rgw/rgw_asio_frontend.cc:173 #15 0x00007f8666b34235 in (anonymous namespace)::AsioFrontend::<lambda(spawn::yield_context)>::operator()(spawn::yield_context) (__closure=0x55b4967fd5b8, yield=...) at /root/projects/ceph/src/rgw/rgw_asio_frontend.cc:634 #16 0x00007f8666b40c4f in spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::<lambda(boost::context::continuation&&)>::operator()(boost::context::continuation &&) const (this=0x7f85edd0cf40, c=...) at /root/projects/ceph/src/spawn/include/spawn/impl/spawn.hpp:311 #17 0x00007f8666b43bdc in std::__invoke_impl<boost::context::continuation, spawn::detail::spawn_helper<Handler, Function, StackAllocator>::operator()() [with Handler = boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >; Function = (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>; StackAllocator = boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>]::<lambda(boost::context::continuation&&)>&, boost::context::continuation>(std::__invoke_other, spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::<lambda(boost::context::continuation&&)> &, boost::context::continuation &&) (__f=..., __args#0=...) at /usr/include/c++/9/bits/invoke.h:60 #18 0x00007f8666b43a8d in std::__invoke<spawn::detail::spawn_helper<Handler, Function, StackAllocator>::operator()() [with Handler = boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >; Function = (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>; StackAllocator = boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>]::<lambda(boost::context::continuation&&)>&, boost::context::continuation>(spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::<lambda(b--Type <RET> for more, q to quit, c to continue without paging--c oost::context::continuation&&)> &, boost::context::continuation &&) (__fn=..., __args#0=...) at /usr/include/c++/9/bits/invoke.h:96 #19 0x00007f8666b43901 in std::invoke<spawn::detail::spawn_helper<Handler, Function, StackAllocator>::operator()() [with Handler = boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >; Function = (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>; StackAllocator = boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>]::<lambda(boost::context::continuation&&)>&, boost::context::continuation>(spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::<lambda(boost::context::continuation&&)> &, boost::context::continuation &&) (__fn=..., __args#0=...) at /usr/include/c++/9/functional:82 #20 0x00007f8666b436d3 in boost::context::detail::record<boost::context::continuation, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>, spawn::detail::spawn_helper<Handler, Function, StackAllocator>::operator()() [with Handler = boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >; Function = (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>; StackAllocator = boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>]::<lambda(boost::context::continuation&&)> >::run(boost::context::detail::fcontext_t) (this=0x7f852a104f00, fctx=0x7f85edd0ccf0) at /root/projects/ceph/build/boost/include/boost/context/continuation_fcontext.hpp:146 21 0x00007f8666b42fa4 in boost::context::detail::context_entry<boost::context::detail::record<boost::context::continuation, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>, spawn::detail::spawn_helper<Handler, Function, StackAllocator>::operator()() [with Handler = boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >; Function = (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(spawn::yield_context)>; StackAllocator = boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>]::<lambda(boost::context::continuation&&)> > >(boost::context::detail::transfer_t) (t=...) at /root/projects/ceph/build/boost/include/boost/context/continuation_fcontext.hpp:80 #22 0x00007f86674b0d5f in make_fcontext () from /root/projects/ceph/build/lib/libradosgw.so.2 #23 0x0000000000000000 in ?? ()
Updated by Casey Bodley about 4 years ago
- Status changed from New to Fix Under Review
- Tags set to multisite
- Pull request ID set to 33193
Updated by Casey Bodley about 4 years ago
- Status changed from Fix Under Review to Pending Backport
- Backport set to nautilus
Updated by Nathan Cutler about 4 years ago
- Copied to Backport #44265: nautilus: rgw crash when 2 zones syncing a pubsub zone added
Updated by Backport Bot over 1 year ago
- Tags changed from multisite to multisite backport_processed
Updated by Konstantin Shalygin over 1 year ago
- Status changed from Pending Backport to Rejected
Updated by Konstantin Shalygin over 1 year ago
- Status changed from Rejected to Resolved
- Tags changed from multisite backport_processed to multisite
Actions