Bug #43135
rgw: core dump in RadosWriter
Status:
Duplicate
Priority:
Urgent
Assignee:
-
Target version:
-
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
ceph version 14.2.1-700.3.0.2.412 (1a3df1588bf87c0817b6d815c208ce6a5efc5b5c) nautilus (stable) 1: (()+0xf5e0) [0x7fe4d86475e0] 2: (()+0x595830) [0x7fe4e59a5830] 3: (()+0x595bfd) [0x7fe4e59a5bfd] 4: (rgw::putobj::RadosWriter::process(ceph::buffer::v14_2_0::list&&, unsigned long)+0xf7) [0x7fe4e59a5e77] 5: (rgw::putobj::ChunkProcessor::process(ceph::buffer::v14_2_0::list&&, unsigned long)+0x1a2) [0x7fe4e59a4fb2] 6: (rgw::putobj::StripeProcessor::process(ceph::buffer::v14_2_0::list&&, unsigned long)+0x15f) [0x7fe4e59a51ff] 7: (RGWPutObj::execute()+0xd35) [0x7fe4e5981c85] 8: (rgw_process_authenticated(RGWHandler_REST*, RGWOp*&, RGWRequest*, req_state*, bool)+0x6f1) [0x7fe4e572e9e1] 9: (process_request(RGWRados*, RGWREST*, RGWRequest*, std::string const&, rgw::auth::StrategyRegistry const&, RGWRestfulIO*, OpsLogSocket*, optional_yield, rgw::dmclock::Scheduler*, int*)+0x1b3c) [0x7fe4e573110c] 10: (()+0x285bad) [0x7fe4e5695bad] 11: (()+0x28698b) [0x7fe4e569698b] 12: (make_fcontext()+0x2f) [0x7fe4e5b5b47f] NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. #16 rgw::putobj::process_completed (completed=..., written=written@entry=0x7fe4ea5f1560) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_putobj_processor.cc:67 67 written->insert(r.obj.get_ref().obj); (gdb) p r.obj $14 = {rados_svc = 0x7fe40000000a, rados_handle = -441011200, ref = {obj = {pool = {name = "", ns = ""}, oid = <error reading variable: Cannot access memory at address 0xffffffffffffffe8>, loc = ""}, ioctx = {io_ctx_impl = 0x7fe4ea5f1400}}} #16 rgw::putobj::process_completed (completed=..., written=written@entry=0x7fe4ea5f1560) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_putobj_processor.cc:67 67 written->insert(r.obj.get_ref().obj); (gdb) p r.obj $8 = {rados_svc = 0x7fe40000000a, rados_handle = -441011200, ref = {obj = {pool = {name = "", ns = ""}, oid = <error reading variable: Cannot access memory at address 0xffffffffffffffe8>, loc = ""}, ioctx = {io_ctx_impl = 0x7fe4ea5f1400}}} (gdb) p *this $13 = {<rgw::putobj::DataProcessor> = {_vptr.DataProcessor = 0x7fe4e5ed6378 <vtable for rgw::putobj::RadosWriter+16>}, aio = 0x7fe4ea5f12a0, store = 0x7fe4e821c800, bucket_info = @0x7fe4ea5f2310, obj_ctx = @0x7fe4ea5f20c0, head_obj = {bucket = {tenant = "", name = "rgw_tps-b1-16mb", marker = "0a753cc2-0999-4a33-bc45-3ad99d543ee8.6342.1", bucket_id = "0a753cc2-0999-4a33-bc45-3ad99d543ee8.6342.1", explicit_placement = { data_pool = {name = "", ns = ""}, data_extra_pool = {name = "", ns = ""}, index_pool = {name = "", ns = ""}}, oid = ".bucket.meta.rgw_tps-b1-16mb:0a753cc2-0999-4a33-bc45-3ad99d543ee8.6342.1"}, key = {name = "file-16MB-r8-589", instance = "", ns = ""}, in_extra_data = false, index_hash_source = ""}, stripe_obj = {rados_svc = 0x7fe4e7580e40, rados_handle = -1, ref = {obj = {pool = { name = "csp_default_hdd_pool", ns = ""}, oid = "0a753cc2-0999-4a33-bc45-3ad99d543ee8.6342.1__shadow_.RfE8V4jqu4FvavfCLSK5uJTcSZdm-f6_2", loc = ""}, ioctx = {Python Exception <class 'gdb.error'> There is no member or method named _M_value_field.: io_ctx_impl = 0x7fe4eab806e0}}}, written = std::set with 1 elements} (gdb) bt #0 0x00007fe4d86474ab in raise () from /lib64/libpthread.so.0 #1 0x00007fe4e570f6f9 in reraise_fatal (signum=11) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/global/signal_handler.cc:81 #2 handle_fatal_signal (signum=11) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/global/signal_handler.cc:326 #3 <signal handler called> #4 _M_is_leaked (this=0xffffffffffffffe8) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/basic_string.h:3220 #5 std::string::_Rep::_M_grab (this=0xffffffffffffffe8, __alloc2=..., __alloc1=...) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/basic_string.h:3222 #6 0x00007fe4e59a5bfd in basic_string (__str=<error reading variable: Cannot access memory at address 0xffffffffffffffe8>, this=0x7fe4f1768b70) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/basic_string.tcc:613 #7 rgw_raw_obj (this=0x7fe4f1768b60) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_common.h:1051 #8 construct<rgw_raw_obj, rgw_raw_obj const&> (this=<optimized out>, __p=<optimized out>) at /opt/rh/devtoolset-7/root/usr/include/c++/7/ext/new_allocator.h:136 #9 construct<rgw_raw_obj, rgw_raw_obj const&> (__a=..., __p=<optimized out>) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/alloc_traits.h:475 #10 _M_construct_node<rgw_raw_obj const&> (this=<optimized out>, __node=0x7fe4f1768b40) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/stl_tree.h:626 #11 _M_create_node<rgw_raw_obj const&> (this=<optimized out>) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/stl_tree.h:643 #12 operator()<const rgw_raw_obj&> (this=<synthetic pointer>, __arg=...) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/stl_tree.h:556 #13 _M_insert_<rgw_raw_obj const&, std::_Rb_tree<rgw_raw_obj, rgw_raw_obj, std::_Identity<rgw_raw_obj>, std::less<rgw_raw_obj>, std::allocator<rgw_raw_obj> >::_Alloc_node> (__node_gen=<synthetic pointer>, __v=..., __p=0x7fe4f1768b00, __x=0x0, this=0x7fe4ea5f1560) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/stl_tree.h:1753 #14 _M_insert_unique<rgw_raw_obj const&> (__v=..., this=0x7fe4ea5f1560) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/stl_tree.h:2096 #15 insert (__x=..., this=0x7fe4ea5f1560) at /opt/rh/devtoolset-7/root/usr/include/c++/7/bits/stl_set.h:501 #16 rgw::putobj::process_completed (completed=..., written=written@entry=0x7fe4ea5f1560) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_putobj_processor.cc:67 #17 0x00007fe4e59a5e77 in rgw::putobj::RadosWriter::process(ceph::buffer::v14_2_0::list&&, unsigned long) (this=0x7fe4ea5f1480, bl=<optimized out>, offset=<optimized out>) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_putobj_processor.cc:96 #18 0x00007fe4e59a4fb2 in process (offset=4194304, data=<optimized out>, this=0x7fe4ea5f1a40) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_putobj.h:41 #19 rgw::putobj::ChunkProcessor::process(ceph::buffer::v14_2_0::list&&, unsigned long) (this=0x7fe4ea5f1a40, data=<optimized out>, offset=<optimized out>) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_putobj.cc:42 #20 0x00007fe4e59a51ff in process (offset=<optimized out>, data=<optimized out>, this=0x7fe4ea5f1aa0) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_putobj.h:41 #21 rgw::putobj::StripeProcessor::process(ceph::buffer::v14_2_0::list&&, unsigned long) (this=0x7fe4ea5f1aa0, data=<unknown type in /usr/lib/debug/usr/bin/radosgw.debug, CU 0x72dd651, DIE 0x72f3f18>, offset=<optimized out>) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_putobj.cc:96 #22 0x00007fe4e5981c85 in RGWPutObj::execute (this=0x7fe4e9a78000) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_op.cc:3768 #23 0x00007fe4e572e9e1 in rgw_process_authenticated (handler=handler@entry=0x7fe4e8a7b4e0, op=@0x7fe4ea5f1d90: 0x7fe4e9a78000, req=req@entry=0x7fe4ea5f28e0, s=s@entry=0x7fe4ea5f2150, skip_retarget=skip_retarget@entry=false) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_process.cc:161 #24 0x00007fe4e573110c in process_request (store=0x7fe4e821c800, rest=0x7ffc15215600, req=req@entry=0x7fe4ea5f28e0, frontend_prefix=..., auth_registry=..., client_io=client_io@entry=0x7fe4ea5f2900, olog=0x0, yield=..., scheduler=0x7fe4e852a6c8, http_ret=0x0) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_process.cc:277 #25 0x00007fe4e5695bad in (anonymous namespace)::handle_connection<boost::asio::basic_stream_socket<boost::asio::ip::tcp> > (env=..., stream=..., buffer=..., pause_mutex=..., scheduler=<optimized out>, ec=..., yield=..., is_ssl=false) ---Type <return> to continue, or q <return> to quit--- at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_asio_frontend.cc:156 #26 0x00007fe4e569698b in operator() (yield=<error reading variable: access outside bounds of object referenced via synthetic pointer>, __closure=0x7fe4f0d5b478) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/src/rgw/rgw_asio_frontend.cc:577 #27 operator() (ca=..., this=<optimized out>) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/build/boost/include/boost/asio/impl/spawn.hpp:382 #28 boost::coroutines::detail::push_coroutine_object<boost::coroutines::pull_coroutine<void>, void, boost::asio::detail::coro_entry_point<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(boost::asio::yield_context)> >&, boost::coroutines::basic_standard_stack_allocator<boost::coroutines::stack_traits> >::run(void) (this=0x7fe4ea5f3f60) at /usr/src/debug/ceph-14.2.1-700.3.0.2.412/build/boost/include/boost/coroutine/detail/push_coroutine_object.hpp:293 #29 0x00007fe4e5b5b47f in make_fcontext () #30 0x00007fe4e5eb8330 in vtable for boost::coroutines::detail::push_coroutine_object<boost::coroutines::pull_coroutine<void>, void, boost::asio::detail::coro_entry_point<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::executor_type> >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::{lambda(boost::asio::basic_yield_context<boost::asio::executor_binder<void (*)(), boost::asio::executor> >)#3}>&, boost::coroutines::basic_standard_stack_allocator<boost::coroutines::stack_traits> > () #31 0x0000000000000026 in ?? () #32 0x0000000000000000 in ?? ()
Related issues
History
#1 Updated by Chang Liu over 4 years ago
I tested with https://tracker.ceph.com/issues/42456 patch. RadosWriter is segfaulted too with that patch.
#2 Updated by Casey Bodley over 4 years ago
- Status changed from New to Need More Info
if this is only happening on nautilus, i'd expect https://tracker.ceph.com/issues/39660 to be the culprit. can you please test with that fix as well?
#3 Updated by Casey Bodley over 4 years ago
- Duplicates Backport #39660: nautilus: rgw: Segfault during request processing added
#4 Updated by Casey Bodley over 4 years ago
- Status changed from Need More Info to Duplicate