Bug #58748
openrgw crashed in RGWObjManifest::encode during bucket notification test
0%
Description
crash do happening during PUT object, when RGW is running with vstart with bucket notifications.
load was done via hsbench (more details are here: https://gist.github.com/yuvalif/3419238fe8debc80c5fdadc30d9b2340)
backtrace does not seem related to the notifications code path:
#0 0x00007f87017041f3 in std::_Rb_tree_increment(std::_Rb_tree_node_base*) () from /lib64/libstdc++.so.6 #1 0x0000563c71d8accd in std::_Rb_tree_const_iterator<std::pair<unsigned long const, RGWObjManifestRule> >::operator++ (this=<synthetic pointer>) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/stl_tree.h:366 #2 ceph::encode<unsigned long, RGWObjManifestRule, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, RGWObjManifestRule> >, denc_traits<unsigned long, void>, denc_traits<RGWObjManifestRule, void> > ( m=std::map with 1 element = {...}, bl=...) at ../src/include/encoding.h:1036 #3 0x0000563c71d9d74d in RGWObjManifest::encode (this=0x563c87da3c70, bl=...) at ../src/rgw/driver/rados/rgw_obj_manifest.h:270 #4 0x0000563c71d6df93 in encode (features=0, bl=..., c=...) at ../src/rgw/driver/rados/rgw_obj_manifest.h:618 #5 RGWRados::Object::Write::_do_write_meta (this=this@entry=0x7f85c563cc00, dpp=<optimized out>, dpp@entry=0x563c82119700, size=size@entry=1024, accounted_size=accounted_size@entry=1024, attrs=std::map with 4 elements = {...}, assume_noent=assume_noent@entry=true, modify_tail=true, _index_op=0x7f85c563c270, y=...) at ../src/rgw/driver/rados/rgw_rados.cc:3117 #6 0x0000563c71d6f36c in RGWRados::Object::Write::write_meta (this=this@entry=0x7f85c563cc00, dpp=0x563c82119700, size=size@entry=1024, accounted_size=accounted_size@entry=1024, attrs=std::map with 4 elements = {...}, y=...) at ../src/rgw/driver/rados/rgw_rados.cc:3316 #7 0x0000563c7210342c in rgw::putobj::AtomicObjectProcessor::complete (this=this@entry=0x563c87da3a28, accounted_size=1024, etag="070f903e3b6ad4f35a82f4827587c74f", mtime=mtime@entry=0x563c8211a018, set_mtime=..., attrs=std::map with 4 elements = {...}, delete_at=..., if_match=0x0, if_nomatch=0x0, user_data=0x0, zones_trace=0x0, pcanceled=0x0, y=...) at ../src/rgw/driver/rados/rgw_putobj_processor.cc:337 #8 0x0000563c71dd2763 in rgw::sal::RadosAtomicWriter::complete (this=this@entry=0x563c87da3a00, accounted_size=<optimized out>, etag="070f903e3b6ad4f35a82f4827587c74f", mtime=mtime@entry=0x563c8211a018, set_mtime=..., set_mtime@entry=..., attrs=std::map with 4 elements = {...}, delete_at=..., if_match=0x0, if_nomatch=0x0, user_data=0x0, zones_trace=0x0, canceled=0x0, y=...) at ../src/rgw/driver/rados/rgw_sal_rados.cc:2965 #9 0x0000563c71b1fb2c in RGWPutObj::execute (this=0x563c82119700, y=...) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/chrono:882 #10 0x0000563c718f5c14 in rgw_process_authenticated (handler=handler@entry=0x563c7d9acd80, op=@0x7f85c563da48: 0x563c82119700, req=req@entry=0x7f85c563e750, s=0x7f85c563dbc0, y=..., driver=0x563c74f7f540, skip_retarget=false) at ../src/rgw/rgw_process.cc:255 #11 0x0000563c718f9da2 in process_request (penv=..., req=req@entry=0x7f85c563e750, frontend_prefix="", client_io=client_io@entry=0x7f85c563e800, yield=..., scheduler=0x563c79ecad88, user=0x7f85c563e960, latency=0x7f85c563e728, http_ret=0x7f85c563e724) at ../src/rgw/rgw_process.cc:392 #12 0x0000563c7181625f in (anonymous namespace)::handle_connection<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > > (context=..., env=..., stream=..., timeout=..., header_limit=16384, buffer=..., is_ssl=false, pause_mutex=..., scheduler=0x563c79ecad88, uri_prefix="", ec=..., yield=...) at ../src/rgw/rgw_asio_frontend.cc:275 #13 0x0000563c71816d34 in operator() (__closure=__closure@entry=0x563c7e997238, yield=...) at ../src/rgw/rgw_asio_frontend.cc:1037 #14 0x0000563c71816eed in operator() (__closure=__closure@entry=0x7f85c563ff18, c=...) at ../src/spawn/include/spawn/impl/spawn.hpp:390 #15 0x0000563c718170ea in std::__invoke_impl<boost::context::continuation, spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::operator()( )::<lambda(boost::context::continuation&&)>&, boost::context::continuation> (__f=...) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/invoke.h:61 #16 std::__invoke<spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous n --Type <RET> for more, q to quit, c to continue without paging-- amespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::operator()()::<lambda(boost::context::continuation&&)>&, boost::cont ext::continuation> (__fn=...) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/invoke.h:97 #17 std::invoke<spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous nam espace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::operator()()::<lambda(boost::context::continuation&&)>&, boost::contex t::continuation> (__fn=...) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/functional:98 #18 boost::context::detail::record<boost::context::continuation, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>, spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost ::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yield_context)>, boost::context::basic_p rotected_fixedsize_stack<boost::context::stack_traits> >::operator()()::<lambda(boost::context::continuation&&)> >::run (fctx=<optimized out>, this=0x7f85c563ff00) at boost/include/boost/context/continuation_fcontext.hpp:143 #19 boost::context::detail::context_entry<boost::context::detail::record<boost::context::continuation, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>, spawn::detail::spawn_helper<boost::asio::executor_binder <void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yi eld_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::operator()()::<lambda(boost::context::continuation&&)> > >(boost::context::detail::transfer_t) (t=...) at boost/include/boost/context/continuation_fcontext.hpp:80 #20 0x0000563c728eb01f in make_fcontext () #21 0x0000000000000000 in ?? () pre>
Updated by Yuval Lifshitz about 1 year ago
anther crash seen on the same type of test:
#0 std::_Rb_tree<unsigned long, std::pair<unsigned long const, RGWObjManifestRule>, std::_Select1st<std::pair<unsigned long const, RGWObjManifestRule> >, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, RGWObjManif estRule> > >::_M_upper_bound (this=this@entry=0x5642b50d7938, __x=0xffffffff00000000, __y=__y@entry=0x5642b50d7940, __k=@0x7f416ae2c530: 1024) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/stl_function.h:399 #1 0x00005642a5fc9e0f in std::_Rb_tree<unsigned long, std::pair<unsigned long const, RGWObjManifestRule>, std::_Select1st<std::pair<unsigned long const, RGWObjManifestRule> >, std::less<unsigned long>, std::allocator<std::pair<unsigned l ong const, RGWObjManifestRule> > >::upper_bound (__k=@0x7f416ae2c530: 1024, this=0x5642b50d7938) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/stl_tree.h:1282 #2 std::map<unsigned long, RGWObjManifestRule, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, RGWObjManifestRule> > >::upper_bound (__x=@0x7f416ae2c530: 1024, this=0x5642b50d7938) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/stl_map.h:1324 #3 RGWObjManifest::obj_iterator::seek (this=this@entry=0x7f416ae2c510, o=1024) at ../src/rgw/rgw_obj_manifest.cc:130 #4 0x00005642a5d6e9a8 in RGWObjManifest::obj_iterator::obj_iterator (_ofs=<optimized out>, _m=0x5642b50d7540, _dpp=0x5642b4a02000, this=0x7f416ae2c510) at ../src/rgw/driver/rados/rgw_obj_manifest.h:515 #5 RGWObjManifest::obj_end (dpp=0x5642b4a02000, this=0x5642b50d7540) at ../src/rgw/driver/rados/rgw_obj_manifest.h:575 #6 RGWRados::update_gc_chain (this=this@entry=0x5642ace7eb00, dpp=0x5642b4a02000, head_obj=..., manifest=..., chain=chain@entry=0x7f416ae2c870) at ../src/rgw/driver/rados/rgw_rados.cc:4923 #7 0x00005642a5dadd53 in RGWRados::Object::complete_atomic_modification (this=0x5642b1c173b0, dpp=<optimized out>) at ../src/rgw/driver/rados/rgw_rados.cc:4897 #8 0x00005642a5db0ea0 in RGWRados::Object::Delete::delete_obj (this=this@entry=0x5642b1c17618, y=..., dpp=<optimized out>, dpp@entry=0x5642b4a02000) at ../src/rgw/driver/rados/rgw_rados.cc:5298 #9 0x00005642a5e18d8a in rgw::sal::RadosObject::RadosDeleteOp::delete_obj (this=0x5642b1c17200, dpp=0x5642b4a02000, y=...) at ../src/rgw/driver/rados/rgw_sal_rados.cc:2168 #10 0x00005642a5b5312e in RGWDeleteObj::execute (this=0x5642b4a02000, y=...) at ../src/rgw/rgw_op.cc:5120 #11 0x00005642a5936016 in rgw_process_authenticated (handler=handler@entry=0x5642b391c6a0, op=@0x7f416ae2da28: 0x5642b4a02000, req=req@entry=0x7f416ae2e730, s=0x7f416ae2dba0, y=..., driver=0x5642a8c95580, skip_retarget=false) at ../src/rgw/rgw_process.cc:255 #12 0x00005642a593a1a4 in process_request (penv=..., req=req@entry=0x7f416ae2e730, frontend_prefix="", client_io=client_io@entry=0x7f416ae2e7e0, yield=..., scheduler=0x5642adb98f08, user=0x7f416ae2e940, latency=0x7f416ae2e708, http_ret=0x7f416ae2e704) at ../src/rgw/rgw_process.cc:392 #13 0x00005642a5856261 in (anonymous namespace)::handle_connection<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > > (context=..., env=..., stream=..., timeout=..., header_limit=16384, buffer=..., is_ssl=false, pause_mutex=..., scheduler=0x5642adb98f08, uri_prefix="", ec=..., yield=...) at ../src/rgw/rgw_asio_frontend.cc:284 #14 0x00005642a5856db0 in operator() (__closure=__closure@entry=0x5642b321a938, yield=...) at ../src/rgw/rgw_asio_frontend.cc:1055 #15 0x00005642a5856f69 in operator() (__closure=__closure@entry=0x7f416ae2ff18, c=...) at ../src/spawn/include/spawn/impl/spawn.hpp:390 #16 0x00005642a5857166 in std::__invoke_impl<boost::context::continuation, spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::operator()( )::<lambda(boost::context::continuation&&)>&, boost::context::continuation> (__f=...) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/invoke.h:61 #17 std::__invoke<spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous n amespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::operator()()::<lambda(boost::context::continuation&&)>&, boost::cont ext::continuation> (__fn=...) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/invoke.h:97 #18 std::invoke<spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous nam espace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yield_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::operator()()::<lambda(boost::context::continuation&&)>&, boost::contex t::continuation> (__fn=...) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/functional:98 #19 boost::context::detail::record<boost::context::continuation, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>, spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost ::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yield_context)>, boost::context::basic_p rotected_fixedsize_stack<boost::context::stack_traits> >::operator()()::<lambda(boost::context::continuation&&)> >::run (fctx=<optimized out>, this=0x7f416ae2ff00) at boost/include/boost/context/continuation_fcontext.hpp:143 #20 boost::context::detail::context_entry<boost::context::detail::record<boost::context::continuation, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits>, spawn::detail::spawn_helper<boost::asio::executor_binder <void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> > >, (anonymous namespace)::AsioFrontend::accept((anonymous namespace)::AsioFrontend::Listener&, boost::system::error_code)::<lambda(yi eld_context)>, boost::context::basic_protected_fixedsize_stack<boost::context::stack_traits> >::operator()()::<lambda(boost::context::continuation&&)> > >(boost::context::detail::transfer_t) (t=...) at boost/include/boost/context/continuation_fcontext.hpp:80 #21 0x00005642a692d79f in make_fcontext () #22 0x0000000000000000 in ?? ()
Updated by Yuval Lifshitz about 1 year ago
both crashes does not seem related to the otification code. may be a result fro mthe fact that the notifications add an extra delay to the ops execution.
Updated by Yuval Lifshitz about 1 year ago
a different backtrace happening in teuthology is detailed here: http://qa-proxy.ceph.com/teuthology/ivancich-2023-01-20_23:59:03-rgw-wip-eric-testing-1-distro-default-smithi/7132082/
2023-01-22T02:12:45.597 INFO:tasks.rgw.client.0.smithi043.stdout:*** Caught signal (Segmentation fault) ** 2023-01-22T02:12:45.598 INFO:tasks.rgw.client.0.smithi043.stdout: in thread 7fcc8d9d8700 thread_name:radosgw 2023-01-22T02:12:45.603 INFO:tasks.rgw.client.0.smithi043.stdout: ceph version 18.0.0-1894-g162efd40 (162efd40ce0547aa435aa291b08d4821869d4cb6) reef (dev) 2023-01-22T02:12:45.603 INFO:tasks.rgw.client.0.smithi043.stdout: 1: /lib64/libpthread.so.0(+0x12cf0) [0x7fcd2406ecf0] 2023-01-22T02:12:45.604 INFO:tasks.rgw.client.0.smithi043.stdout: 2: (std::__detail::_NFA<std::__cxx11::regex_traits<char> >::_M_insert_subexpr_end()+0x38) [0x55f4d395e5a8] 2023-01-22T02:12:45.604 INFO:tasks.rgw.client.0.smithi043.stdout: 3: (std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_atom()+0x5bf) [0x55f4d396689f] 2023-01-22T02:12:45.604 INFO:tasks.rgw.client.0.smithi043.stdout: 4: (std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_alternative()+0x140) [0x55f4d3966e40] 2023-01-22T02:12:45.604 INFO:tasks.rgw.client.0.smithi043.stdout: 5: (std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_disjunction()+0x3a) [0x55f4d396719a] 2023-01-22T02:12:45.604 INFO:tasks.rgw.client.0.smithi043.stdout: 6: (std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_Compiler(char const*, char const*, std::locale const&, std::regex_constants::syntax_option_type)+0x3ae) [0x55f4d396781e] 2023-01-22T02:12:45.605 INFO:tasks.rgw.client.0.smithi043.stdout: 7: (rgw::parse_url_authority(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&)+0x1fa) [0x55f4d3f9fc7a] 2023-01-22T02:12:45.605 INFO:tasks.rgw.client.0.smithi043.stdout: 8: (rgw::kafka::Manager::connect(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool, bool, boost::optional<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, boost::optional<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>)+0x2b0) [0x55f4d3dd0db0] 2023-01-22T02:12:45.605 INFO:tasks.rgw.client.0.smithi043.stdout: 9: (rgw::kafka::connect(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool, bool, boost::optional<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, boost::optional<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>)+0x5c) [0x55f4d3dcdcec] 2023-01-22T02:12:45.605 INFO:tasks.rgw.client.0.smithi043.stdout: 10: (RGWPubSubKafkaEndpoint::RGWPubSubKafkaEndpoint(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWHTTPArgs const&, ceph::common::CephContext*)+0x152) [0x55f4d416ffd2] 2023-01-22T02:12:45.606 INFO:tasks.rgw.client.0.smithi043.stdout: 11: (RGWPubSubEndpoint::create(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWHTTPArgs const&, ceph::common::CephContext*)+0x97) [0x55f4d416ee77] 2023-01-22T02:12:45.606 INFO:tasks.rgw.client.0.smithi043.stdout: 12: (rgw::notify::publish_commit(rgw::sal::Object*, unsigned long, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rgw::notify::EventType, rgw::notify::reservation_t&, DoutPrefixProvider const*)+0xbb2) [0x55f4d40a0582] 2023-01-22T02:12:45.606 INFO:tasks.rgw.client.0.smithi043.stdout: 13: (rgw::sal::RadosNotification::publish_commit(DoutPrefixProvider const*, unsigned long, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x2f) [0x55f4d3ce315f] 2023-01-22T02:12:45.606 INFO:tasks.rgw.client.0.smithi043.stdout: 14: (RGWPutObj::execute(optional_yield)+0x33c5) [0x55f4d3afc515] 2023-01-22T02:12:45.606 INFO:tasks.rgw.client.0.smithi043.stdout: 15: (rgw_process_authenticated(RGWHandler_REST*, RGWOp*&, RGWRequest*, req_state*, optional_yield, rgw::sal::Driver*, bool)+0xd91) [0x55f4d38eb031] 2023-01-22T02:12:45.607 INFO:tasks.rgw.client.0.smithi043.stdout: 16: (process_request(RGWProcessEnv const&, RGWRequest*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWRestfulIO*, optional_yield, rgw::dmclock::Scheduler*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >*, int*)+0x2b5c) [0x55f4d38ee61c] 2023-01-22T02:12:45.607 INFO:tasks.rgw.client.0.smithi043.stdout: 17: radosgw(+0x64a1e1) [0x55f4d38281e1] 2023-01-22T02:12:45.607 INFO:tasks.rgw.client.0.smithi043.stdout: 18: radosgw(+0x64ae04) [0x55f4d3828e04] 2023-01-22T02:12:45.608 INFO:tasks.rgw.client.0.smithi043.stdout: 19: make_fcontext()
Updated by Yuval Lifshitz about 1 year ago
crash seen in other place in the code when running bucket notification test. in RGWDeleteMultiObj::execute() there is a crash in line 7097:
for (iter = multi_delete->objects.begin(); 7087│ iter != multi_delete->objects.end(); 7088│ ++iter) { 7089│ rgw_obj_key obj_key = *iter; 7090│ if (y && max_aio > 1) { 7091│ wait_flush(y, &*formatter_flush_cond, [&aio_count, max_aio] { 7092│ return aio_count < max_aio; 7093│ }); 7094│ aio_count++; 7095│ spawn::spawn(y.get_yield_context(), [this, &y, &aio_count, obj_key, &formatter_flush_cond] (yield_context yield) { 7096│ handle_individual_object(obj_key, optional_yield { y.get_io_context(), yield }, &*formatter_flush_cond); 7097├───────> aio_count--; 7098│ }); 7099│ } else { 7100│ handle_individual_object(obj_key, y, &*formatter_flush_cond); 7101│ } 7102│ } 7103│ if (formatter_flush_cond) { 7104│ wait_flush(y, &*formatter_flush_cond, [this, n=multi_delete->objects.size()] { 7105│ return n == ops_log_entries.size(); 7106│ }); 7107│ }
p aio_count $1 = (uint32_t &) <error reading variable: Cannot access memory at address 0x7fe62cbb4634>
this is not due to the issue from: https://tracker.ceph.com/issues/58793 since i tried with the fix, and it still crashed at the same place.
Updated by Casey Bodley 4 months ago
Yuval Lifshitz wrote:
a different backtrace happening in teuthology is detailed here: http://qa-proxy.ceph.com/teuthology/ivancich-2023-01-20_23:59:03-rgw-wip-eric-testing-1-distro-default-smithi/7132082/
[...]
saw another example of this crash in http://qa-proxy.ceph.com/teuthology/cbodley-2023-12-15_01:06:56-rgw-wip-cbodley-testing-distro-default-smithi/7492877/teuthology.log:
2023-12-15T01:43:58.253 DEBUG:teuthology.orchestra.run.smithi027:bucket notification tests against different endpoints> BNTESTS_CONF=/home/ubuntu/cephtest/ceph/src/test/rgw/bucket_notification/bn-tests.client.0.conf /home/ubuntu/cephtest/ceph/src/test/rgw/bucket_notification/virtualenv/bin/python -m nose -s /home/ubuntu/cephtest/ceph/src/test/rgw/bucket_notification/test_bn.py -v -a kafka_test 2023-12-15T01:43:59.062 INFO:tasks.rgw.client.0.smithi027.stdout:*** Caught signal (Segmentation fault) ** 2023-12-15T01:43:59.063 INFO:tasks.rgw.client.0.smithi027.stdout: in thread 7f7c2e2ba640 thread_name:radosgw 2023-12-15T01:43:59.064 INFO:tasks.rgw.client.0.smithi027.stdout: ceph version 19.0.0-176-g4a1cf0b4 (4a1cf0b45eccd2f4955bf9c26d3227564def63e5) reef (dev) 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 1: /lib64/libc.so.6(+0x54db0) [0x7f7cad654db0] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 2: radosgw(+0x4d299c) [0x55de7572699c] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 3: radosgw(+0x4d1bcd) [0x55de75725bcd] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 4: radosgw(+0x4d1bcd) [0x55de75725bcd] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 5: radosgw(+0x4b9e0a) [0x55de7570de0a] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 6: radosgw(+0x4bccb9) [0x55de75710cb9] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 7: (rgw::parse_url_authority(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&)+0x1c2) [0x55de75986882] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 8: radosgw(+0x88e5b9) [0x55de75ae25b9] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 9: (RGWPubSubEndpoint::create(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWHTTPArgs const&, ceph::common::CephContext*)+0x303) [0x55de759bbe03] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 10: (rgw::notify::publish_commit(rgw::sal::Object*, unsigned long, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rgw::notify::EventType, rgw::notify::reservation_t&, DoutPrefixProvider const*)+0x76c) [0x55de759ada2c] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 11: (rgw::sal::RadosNotification::publish_commit(DoutPrefixProvider const*, unsigned long, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x2f) [0x55de75a4e95f] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 12: (RGWPutObj::execute(optional_yield)+0x36ea) [0x55de7582c65a] 2023-12-15T01:43:59.065 INFO:tasks.rgw.client.0.smithi027.stdout: 13: (rgw_process_authenticated(RGWHandler_REST*, RGWOp*&, RGWRequest*, req_state*, optional_yield, rgw::sal::Driver*, bool)+0xa72) [0x55de756d0df2] 2023-12-15T01:43:59.066 INFO:tasks.rgw.client.0.smithi027.stdout: 14: (process_request(RGWProcessEnv const&, RGWRequest*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWRestfulIO*, optional_yield, rgw::dmclock::Scheduler*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >*, int*)+0xf7d) [0x55de756d41ed] 2023-12-15T01:43:59.066 INFO:tasks.rgw.client.0.smithi027.stdout: 15: radosgw(+0xc64f50) [0x55de75eb8f50] 2023-12-15T01:43:59.066 INFO:tasks.rgw.client.0.smithi027.stdout: 16: radosgw(+0x3ce426) [0x55de75622426] 2023-12-15T01:43:59.066 INFO:tasks.rgw.client.0.smithi027.stdout: 17: make_fcontext()
Updated by Yuval Lifshitz 3 months ago
a different crash in the notifications suite (code does not seem related to notifications):
ceph version 19.0.0-830-g01624f03 (01624f03150a05074975b8090eb8b1e8d78efd04) squid (dev) 1: /lib64/libc.so.6(+0x54db0) [0x7f1d0ea54db0] 2: radosgw(+0xcb9ca2) [0x559924168ca2] 3: radosgw(+0xcb9cae) [0x559924168cae] 4: (RGWRados::cls_obj_prepare_op(DoutPrefixProvider const*, RGWRados::BucketShard&, RGWModifyOp, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, rgw_obj&, unsigned short, optional_yield, rgw_zone_set*, bool)+0x6a2) [0x559923c6a242] 5: radosgw(+0x7d22c6) [0x559923c812c6] 6: (RGWRados::Bucket::UpdateIndex::guard_reshard(DoutPrefixProvider const*, rgw_obj const&, RGWRados::BucketShard**, std::function<int (RGWRados::BucketShard*)>, optional_yield)+0x9c) [0x559923c55e6c] 7: (RGWRados::Bucket::UpdateIndex::prepare(DoutPrefixProvider const*, RGWModifyOp, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const*, optional_yield, bool)+0x113) [0x559923c56723] 8: (RGWRados::Object::Write::_do_write_meta(unsigned long, unsigned long, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >&, bool, bool, void*, req_context const&, bool)+0xd1a) [0x559923c47cea] 9: (RGWRados::Object::Write::write_meta(unsigned long, unsigned long, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >&, req_context const&, bool)+0x55b) [0x559923c48dcb] 10: (rgw::putobj::AtomicObjectProcessor::complete(unsigned long, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >&, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >, char const*, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const*, rgw_zone_set*, bool*, req_context const&, unsigned int)+0x2f7) [0x559923c2b387] 11: (RGWPutObj::execute(optional_yield)+0x366f) [0x559923a9168f] 12: (rgw_process_authenticated(RGWHandler_REST*, RGWOp*&, RGWRequest*, req_state*, optional_yield, rgw::sal::Driver*, bool)+0xa72) [0x559923932cc2] 13: (process_request(RGWProcessEnv const&, RGWRequest*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWRestfulIO*, optional_yield, rgw::dmclock::Scheduler*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >*, int*)+0xf7d) [0x5599239360ed] 14: radosgw(+0xc79140) [0x559924128140] 15: radosgw(+0x3e6924) [0x559923895924] 16: make_fcontext() NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this.