Actions
Bug #52800
closedOSD down make all the RGW crashed and cluster unavailable with production outage
Status:
Duplicate
Priority:
Normal
Assignee:
-
Target version:
-
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Description
This is the backtrace that I got. When osd goes down it kills all the RGW in the cluster.
What I can see is that the osd read/write latency is going high and they just dies.
Once the osd is down it will kill all the rgw in the cluster after 2-6 minutes. (attached screenshot)
{
"backtrace": [
"(()+0x12b20) [0x7f3281d1db20]",
"(ceph::buffer::v15_2_0::ptr::ptr(ceph::buffer::v15_2_0::ptr const&)+0x1b) [0x7f328c289d9b]",
"(ceph::buffer::v15_2_0::ptr_node::cloner::operator()(ceph::buffer::v15_2_0::ptr_node const&)+0x2e) [0x7f328c28d0ee]",
"(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >* std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >::_M_copy<std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >::_Reuse_or_alloc_node>(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > const*, std::_Rb_tree_node_base*, std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >::_Reuse_or_alloc_node&)+0x4c3) [0x7f328ca83bd3]",
"(std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >::operator=(std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > const&)+0x93) [0x7f328ca83e83]",
"(ObjectCache::get(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, ObjectCacheInfo&, unsigned int, rgw_cache_entry_info*)+0xe39) [0x7f328cb9c989]",
"(RGWSI_SysObj_Cache::read(RGWSysObjectCtxBase&, RGWSI_SysObj_Obj_GetObjState&, RGWObjVersionTracker*, rgw_raw_obj const&, ceph::buffer::v15_2_0::list*, long, long, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >*, bool, rgw_cache_entry_info*, boost::optional<obj_version>, optional_yield)+0x31d) [0x7f328cf558bd]",
"(RGWSI_SysObj::Obj::ROp::read(long, long, ceph::buffer::v15_2_0::list*, optional_yield)+0xce) [0x7f328cb0a65e]",
"(rgw_get_system_obj(RGWSysObjectCtx&, rgw_pool const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, ceph::buffer::v15_2_0::list&, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, optional_yield, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >*, rgw_cache_entry_info*, boost::optional<obj_version>)+0x1bd) [0x7f328ce89e5d]",
"(RGWSI_MetaBackend_SObj::get_entry(RGWSI_MetaBackend::Context*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWSI_MetaBackend::GetParams&, RGWObjVersionTracker*, optional_yield)+0xe7) [0x7f328cf45ea7]",
"(RGWSI_User_RADOS::read_user_info(RGWSI_MetaBackend::Context*, rgw_user const&, RGWUserInfo*, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, rgw_cache_entry_info*, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >*, optional_yield)+0x254) [0x7f328cf5e114]",
"(RGWSI_User_RADOS::get_user_info_from_index(RGWSI_MetaBackend::Context*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rgw_pool const&, RGWUserInfo*, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, optional_yield)+0xcab) [0x7f328cf6063b]",
"(RGWSI_User_RADOS::get_user_info_by_access_key(RGWSI_MetaBackend::Context*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWUserInfo*, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, optional_yield)+0x56) [0x7f328cf60a96]",
"(()+0x924ccf) [0x7f328ce92ccf]",
"(()+0x9d581e) [0x7f328cf4381e]",
"(RGWSI_MetaBackend_SObj::call(std::optional<std::variant<RGWSI_MetaBackend_CtxParams_SObj> >, std::function<int (RGWSI_MetaBackend::Context*)>)+0x9e) [0x7f328cf4678e]",
"(RGWSI_MetaBackend_Handler::call(std::optional<std::variant<RGWSI_MetaBackend_CtxParams_SObj> >, std::function<int (RGWSI_MetaBackend_Handler::Op*)>)+0x5f) [0x7f328cf4364f]",
"(RGWSI_MetaBackend_Handler::call(std::function<int (RGWSI_MetaBackend_Handler::Op*)>)+0x78) [0x7f328cea12a8]",
"(RGWUserCtl::get_info_by_access_key(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWUserInfo*, optional_yield, RGWUserCtl::GetParams const&)+0x94) [0x7f328ce94144]",
"(rgw_get_user_info_by_access_key(RGWUserCtl*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWUserInfo&, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*)+0x40) [0x7f328ce941e0]",
"(rgw::auth::s3::LocalEngine::authenticate(DoutPrefixProvider const*, boost::basic_string_view<char, std::char_traits<char> > const&, boost::basic_string_view<char, std::char_traits<char> > const&, boost::basic_string_view<char, std::char_traits<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::function<basic_sstring<char, unsigned short, (unsigned short)65> (ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)> const&, std::function<std::shared_ptr<rgw::auth::Completer> (boost::optional<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > const&)> const&, req_state const*) const+0xb9) [0x7f328ce64879]",
"(rgw::auth::s3::AWSEngine::authenticate(DoutPrefixProvider const*, req_state const*) const+0x11c) [0x7f328ce3c5ac]",
"(rgw::auth::Strategy::authenticate(DoutPrefixProvider const*, req_state const*) const+0x24a) [0x7f328cb41e4a]",
"(rgw::auth::Strategy::authenticate(DoutPrefixProvider const*, req_state const*) const+0x24a) [0x7f328cb41e4a]",
"(rgw::auth::Strategy::apply(DoutPrefixProvider const*, rgw::auth::Strategy const&, req_state*)+0x3d) [0x7f328cb428dd]",
"(RGW_Auth_S3::authorize(DoutPrefixProvider const*, rgw::sal::RGWRadosStore*, rgw::auth::StrategyRegistry const&, req_state*)+0x8b) [0x7f328ce5fd0b]",
"(process_request(rgw::sal::RGWRadosStore*, RGWREST*, RGWRequest*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rgw::auth::StrategyRegistry const&, RGWRestfulIO*, OpsLogSocket*, optional_yield, rgw::dmclock::Scheduler*, int*)+0x1562) [0x7f328ca34e02]",
"(()+0x417556) [0x7f328c985556]",
"(()+0x418e93) [0x7f328c986e93]",
"(make_fcontext()+0x2f) [0x7f328d041f9f]"
],
"ceph_version": "15.2.10",
"crash_id": "2021-10-01T11:07:12.124286Z_36aa0864-8446-4111-b511-1a21f1e73327",
"entity_name": "client.rgw.mon-2s03.rgw0",
"os_id": "centos",
"os_name": "CentOS Linux",
"os_version": "8",
"os_version_id": "8",
"process_name": "radosgw",
"stack_sig": "f6c2aef1d0ab198dcd7b51d83bd958392ee0af50c91fce12fbc9c6bbf540d141",
"timestamp": "2021-10-01T11:07:12.124286Z",
"utsname_hostname": "mon-2s03",
"utsname_machine": "x86_64",
"utsname_release": "4.18.0-240.15.1.el8_3.x86_64",
"utsname_sysname": "Linux",
"utsname_version": "#1 SMP Mon Mar 1 17:16:16 UTC 2021"
}
Files
Updated by Casey Bodley over 2 years ago
- Is duplicate of Bug #51927: crash: RGWSI_MetaBackend_SObj added
Updated by Yaarit Hatuka over 2 years ago
- Status changed from New to Duplicate
Changing status to 'Duplicate' to match this issue's 'Duplicates' relation.
Actions