Bug #63373
Updated by Abhishek Lekshmanan 6 months ago
On running a multisite setup with Reef (18.2.0) we run into this situation where the client facing RGWs (the ones with rgw run sync thread = false) deadlock after running a warp s3 benchmark with around 10 clients. The deadlock is reproducible even when running rgw run sync thread = true is set. On inspecting one of the RGWs with eu-stack we see that all of the RGW threads are blocked at `RGWDataChangesLog::add_entry`, we see mainly the following 2 patterns in the active threads <pre> ``` #0 0x00007f9ba0a7d82d __lll_lock_wait #1 0x00007f9ba0a76ad9 __pthread_mutex_lock #2 0x000055d9caf70e4b LazyFIFO::lazy_init(DoutPrefixProvider const*, optional_yield) #3 0x000055d9caf79968 RGWDataChangesFIFO::push(DoutPrefixProvider const*, int, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long...(=) #4 0x000055d9caf6c938 RGWDataChangesLog::add_entry(DoutPrefixProvider const*, RGWBucketInfo const&, rgw::bucket_log_layout_generation const&, int, optional_yield) #5 0x000055d9cac8cba5 add_datalog_entry(DoutPrefixProvider const*, RGWDataChangesLog*, RGWBucketInfo const&, unsigned int, optional_yield) #6 0x000055d9cac9da60 RGWRados::Bucket::UpdateIndex::complete(...) #8 0x000055d9cacc4e67 RGWRados::Object::Write::write_meta(...) </pre> ``` And this state <pre> ``` #0 0x00007f9ba0a7a45c pthread_cond_wait@@GLIBC_2.3.2 #1 0x00007f9b9ff35860 std::condition_variable::wait(std::unique_lock<std::mutex>&) #2 0x000055d9caf6c323 RGWDataChangesLog::add_entry(DoutPrefixProvider const*, RGWBucketInfo const&, rgw::bucket_log_layout_generation const&, int, optional_yield) #3 0x000055d9cac8cba5 add_datalog_entry(DoutPrefixProvider const*, RGWDataChangesLog*, RGWBucketInfo const&, unsigned int, optional_yield) #4 0x000055d9cac9da60 RGWRados::Bucket::UpdateIndex::complete() </pre> ``` On a more detailed analysis via GDB on the lazy init threads we see <pre> ``` #0 0x00007f9ba0a7d82d in __lll_lock_wait () from target:/lib64/libpthread.so.0 #1 0x00007f9ba0a76ad9 in pthread_mutex_lock () from target:/lib64/libpthread.so.0 #2 0x000055d9caf70e4b in __gthread_mutex_lock (__mutex=0x55d9cc496de8) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/x86_64-redhat-linux/bits/gthr-default.h:749 #3 std::mutex::lock (this=0x55d9cc496de8) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/std_mutex.h:100 #4 std::unique_lock<std::mutex>::lock (this=0x7f9a54644ec0) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/unique_lock.h:139 #5 std::unique_lock<std::mutex>::unique_lock (__m=..., this=0x7f9a54644ec0) at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/unique_lock.h:69 #6 LazyFIFO::lazy_init (this=this@entry=0x55d9cc496dc0, dpp=dpp@entry=0x55d9d897c000, y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h:264 #7 0x000055d9caf79968 in LazyFIFO::push (y=..., bl=..., dpp=0x55d9d897c000, this=0x55d9cc496dc0) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h:305 #8 RGWDataChangesFIFO::push (this=0x55d9cd28d300, dpp=0x55d9d897c000, index=40, bl=..., y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_datalog.cc:286 #9 0x000055d9caf6c938 in RGWDataChangesLog::add_entry (this=0x55d9cc546200, dpp=0x55d9d897c000, bucket_info=..., gen=..., shard_id=<optimized out>, y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/x86_64-redhat-linux-gnu/boost/include/boost/smart_ptr/intrusive_ptr.hpp:197 #10 0x000055d9cac8cba5 in add_datalog_entry(DoutPrefixProvider const*, RGWDataChangesLog*, RGWBucketInfo const&, unsigned int, optional_yield) () at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/stl_iterator.h:1083 </pre> ``` Inspecting the mutex we see <pre> ``` (gdb) f 6 #6 LazyFIFO::lazy_init (this=this@entry=0x55d9cc496dc0, dpp=dpp@entry=0x55d9d897c000, y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h:264 264 /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h: No such file or directory. (gdb) p this $77 = (LazyFIFO * const) 0x55d9cc496dc0 (gdb) p this->m $78 = {<std::__mutex_base> = {_M_mutex = {__data = {__lock = 2, __count = 0, __owner = 286845, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\002\000\000\000\000\000\000\000}`\004\000\001", '\000' <repeats 26 times>, __align = 2}}, <No data fields>} </pre> ``` Looking at the thread corresponding to the owner of the mutex <pre> ``` (gdb) thread 261 [Switching to thread 261 (Thread 0x7f9b16bde700 (LWP 286845))] #0 0x00007f9ba0a7a45c in pthread_cond_wait@@GLIBC_2.3.2 () from target:/lib64/libpthread.so.0 (gdb) bt #0 0x00007f9ba0a7a45c in pthread_cond_wait@@GLIBC_2.3.2 () from target:/lib64/libpthread.so.0 #1 0x00007f9b9ff35860 in std::condition_variable::wait(std::unique_lock<std::mutex>&) () from target:/lib64/libstdc++.so.6 #2 0x000055d9caf6c323 in ceph::common::RefCountedCond::wait (this=0x55d9d044d400) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/common/RefCountedObj.h:111 #3 RGWDataChangesLog::add_entry (this=0x55d9cc546200, dpp=<optimized out>, bucket_info=..., gen=..., shard_id=<optimized out>, y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_datalog.cc:689 #4 0x000055d9cac8cba5 in add_datalog_entry(DoutPrefixProvider const*, RGWDataChangesLog*, RGWBucketInfo const&, unsigned int, optional_yield) () at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/stl_iterator.h:1083 #5 0x000055d9cac9dcd8 in RGWRados::Bucket::UpdateIndex::complete_del(DoutPrefixProvider const*, long, unsigned long, std::chrono::time_point<ceph::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >&, std::__cxx11::list<rgw_obj_index_key, std::allocator<rgw_obj_index_key> >*, optional_yield) () at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_rados.cc:6374 #6 0x000055d9cacb041b in RGWRados::Object::Delete::delete_obj(optional_yield, DoutPrefixProvider const*) () at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_rados.cc:5347 #7 0x000055d9cad18942 in rgw::sal::RadosObject::RadosDeleteOp::delete_obj (this=0x55d9d0b39a00, dpp=0x55d9d003a6c0, y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_sal_rados.cc:2273 #8 0x000055d9caa3f859 in RGWDeleteMultiObj::handle_individual_object(rgw_obj_key const&, optional_yield, boost::asio::basic_deadline_timer<boost::posix_time::ptime, boost::asio::time_traits<boost::posix_time::ptime>, boost::asio::executor>*) () at /opt/rh/gcc-toolset-11/root/usr/include/c++/11/bits/unique_ptr.h:421 #9 0x000055d9caa404f0 in void boost::context::detail::context_entry<boost::context::detail::record<boost::context::continuation, boost::context::basic_fixedsize_stack<boost::context::stack_traits>, spawn::detail::spawn_helper<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0ul> > >, RGWDeleteMultiObj::execute(optional_yield)::{lambda(spawn::basic_yield_context<boost::asio::executor_binder<void (*)(), boost::asio::strand<boost::asio::io_context::basic_executor_type<std::allocator<void>, 0ul> > > >)#3}, boost::context::basic_fixedsize_stack<boost::context::stack_traits> >::operator()()::{lambda(boost::context::continuation&&)#1}> >(boost::context::detail::transfer_t) () at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/common/async/yield_context.h:40 #10 0x000055d9cb2860af in make_fcontext () #11 0x0000000000000000 in ?? () </pre> ``` Randomly looking at most threads blocked in lazy_init, it looks like the owner of mutex still to be thread 286845 <pre> ``` (gdb) thread 596 [Switching to thread 596 (Thread 0x7f9a6f28f700 (LWP 287180))] #0 0x00007f9ba0a7d82d in __lll_lock_wait () from target:/lib64/libpthread.so.0 (gdb) f 6 #6 LazyFIFO::lazy_init (this=this@entry=0x55d9cc496dc0, dpp=dpp@entry=0x55d9d31dab00, y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h:264 264 /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h: No such file or directory. (gdb) p this->m $79 = {<std::__mutex_base> = {_M_mutex = {__data = {__lock = 2, __count = 0, __owner = 286845, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\002\000\000\000\000\000\000\000}`\004\000\001", '\000' <repeats 26 times>, __align = 2}}, <No data fields>} (gdb) thread 595 [Switching to thread 595 (Thread 0x7f9a6fa90700 (LWP 287179))] #0 0x00007f9ba0a7d82d in __lll_lock_wait () from target:/lib64/libpthread.so.0 (gdb) f 6 #6 LazyFIFO::lazy_init (this=this@entry=0x55d9cc496dc0, dpp=dpp@entry=0x55d9d680c200, y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h:264 264 in /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h (gdb) p this->m $80 = {<std::__mutex_base> = {_M_mutex = {__data = {__lock = 2, __count = 0, __owner = 286845, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\002\000\000\000\000\000\000\000}`\004\000\001", '\000' <repeats 26 times>, __align = 2}}, <No data fields>} (gdb) thread 589 [Switching to thread 589 (Thread 0x7f9a72a96700 (LWP 287173))] #0 0x00007f9ba0a7d82d in __lll_lock_wait () from target:/lib64/libpthread.so.0 (gdb) f 6 #6 LazyFIFO::lazy_init (this=this@entry=0x55d9cc496dc0, dpp=dpp@entry=0x55d9d4462200, y=...) at /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h:264 264 in /usr/src/debug/ceph-18.2.0-1.rh8.cern.x86_64/src/rgw/driver/rados/rgw_log_backing.h (gdb) p this->m $81 = {<std::__mutex_base> = {_M_mutex = {__data = {__lock = 2, __count = 0, __owner = 286845, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\002\000\000\000\000\000\000\000}`\004\000\001", '\000' <repeats 26 times>, __align = 2}}, <No data fields>} </pre> ``` Is this by any chance fixed in main master already? What steps can we help to debug this problem further?