Project

General

Profile

Actions

Bug #52800

closed

OSD down make all the RGW crashed and cluster unavailable with production outage

Added by Ist Gab over 2 years ago. Updated over 2 years ago.

Status:
Duplicate
Priority:
Normal
Assignee:
-
Target version:
-
% Done:

0%

Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

This is the backtrace that I got. When osd goes down it kills all the RGW in the cluster.
What I can see is that the osd read/write latency is going high and they just dies.
Once the osd is down it will kill all the rgw in the cluster after 2-6 minutes. (attached screenshot)

{
    "backtrace": [
        "(()+0x12b20) [0x7f3281d1db20]",
        "(ceph::buffer::v15_2_0::ptr::ptr(ceph::buffer::v15_2_0::ptr const&)+0x1b) [0x7f328c289d9b]",
        "(ceph::buffer::v15_2_0::ptr_node::cloner::operator()(ceph::buffer::v15_2_0::ptr_node const&)+0x2e) [0x7f328c28d0ee]",
        "(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >* std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >::_M_copy<std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >::_Reuse_or_alloc_node>(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > const*, std::_Rb_tree_node_base*, std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >::_Reuse_or_alloc_node&)+0x4c3) [0x7f328ca83bd3]",
        "(std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >::operator=(std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > const&)+0x93) [0x7f328ca83e83]",
        "(ObjectCache::get(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, ObjectCacheInfo&, unsigned int, rgw_cache_entry_info*)+0xe39) [0x7f328cb9c989]",
        "(RGWSI_SysObj_Cache::read(RGWSysObjectCtxBase&, RGWSI_SysObj_Obj_GetObjState&, RGWObjVersionTracker*, rgw_raw_obj const&, ceph::buffer::v15_2_0::list*, long, long, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >*, bool, rgw_cache_entry_info*, boost::optional<obj_version>, optional_yield)+0x31d) [0x7f328cf558bd]",
        "(RGWSI_SysObj::Obj::ROp::read(long, long, ceph::buffer::v15_2_0::list*, optional_yield)+0xce) [0x7f328cb0a65e]",
        "(rgw_get_system_obj(RGWSysObjectCtx&, rgw_pool const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, ceph::buffer::v15_2_0::list&, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, optional_yield, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >*, rgw_cache_entry_info*, boost::optional<obj_version>)+0x1bd) [0x7f328ce89e5d]",
        "(RGWSI_MetaBackend_SObj::get_entry(RGWSI_MetaBackend::Context*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWSI_MetaBackend::GetParams&, RGWObjVersionTracker*, optional_yield)+0xe7) [0x7f328cf45ea7]",
        "(RGWSI_User_RADOS::read_user_info(RGWSI_MetaBackend::Context*, rgw_user const&, RGWUserInfo*, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, rgw_cache_entry_info*, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > >*, optional_yield)+0x254) [0x7f328cf5e114]",
        "(RGWSI_User_RADOS::get_user_info_from_index(RGWSI_MetaBackend::Context*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rgw_pool const&, RGWUserInfo*, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, optional_yield)+0xcab) [0x7f328cf6063b]",
        "(RGWSI_User_RADOS::get_user_info_by_access_key(RGWSI_MetaBackend::Context*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWUserInfo*, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*, optional_yield)+0x56) [0x7f328cf60a96]",
        "(()+0x924ccf) [0x7f328ce92ccf]",
        "(()+0x9d581e) [0x7f328cf4381e]",
        "(RGWSI_MetaBackend_SObj::call(std::optional<std::variant<RGWSI_MetaBackend_CtxParams_SObj> >, std::function<int (RGWSI_MetaBackend::Context*)>)+0x9e) [0x7f328cf4678e]",
        "(RGWSI_MetaBackend_Handler::call(std::optional<std::variant<RGWSI_MetaBackend_CtxParams_SObj> >, std::function<int (RGWSI_MetaBackend_Handler::Op*)>)+0x5f) [0x7f328cf4364f]",
        "(RGWSI_MetaBackend_Handler::call(std::function<int (RGWSI_MetaBackend_Handler::Op*)>)+0x78) [0x7f328cea12a8]",
        "(RGWUserCtl::get_info_by_access_key(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWUserInfo*, optional_yield, RGWUserCtl::GetParams const&)+0x94) [0x7f328ce94144]",
        "(rgw_get_user_info_by_access_key(RGWUserCtl*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWUserInfo&, RGWObjVersionTracker*, std::chrono::time_point<ceph::time_detail::real_clock, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> > >*)+0x40) [0x7f328ce941e0]",
        "(rgw::auth::s3::LocalEngine::authenticate(DoutPrefixProvider const*, boost::basic_string_view<char, std::char_traits<char> > const&, boost::basic_string_view<char, std::char_traits<char> > const&, boost::basic_string_view<char, std::char_traits<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::function<basic_sstring<char, unsigned short, (unsigned short)65> (ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)> const&, std::function<std::shared_ptr<rgw::auth::Completer> (boost::optional<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > const&)> const&, req_state const*) const+0xb9) [0x7f328ce64879]",
        "(rgw::auth::s3::AWSEngine::authenticate(DoutPrefixProvider const*, req_state const*) const+0x11c) [0x7f328ce3c5ac]",
        "(rgw::auth::Strategy::authenticate(DoutPrefixProvider const*, req_state const*) const+0x24a) [0x7f328cb41e4a]",
        "(rgw::auth::Strategy::authenticate(DoutPrefixProvider const*, req_state const*) const+0x24a) [0x7f328cb41e4a]",
        "(rgw::auth::Strategy::apply(DoutPrefixProvider const*, rgw::auth::Strategy const&, req_state*)+0x3d) [0x7f328cb428dd]",
        "(RGW_Auth_S3::authorize(DoutPrefixProvider const*, rgw::sal::RGWRadosStore*, rgw::auth::StrategyRegistry const&, req_state*)+0x8b) [0x7f328ce5fd0b]",
        "(process_request(rgw::sal::RGWRadosStore*, RGWREST*, RGWRequest*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rgw::auth::StrategyRegistry const&, RGWRestfulIO*, OpsLogSocket*, optional_yield, rgw::dmclock::Scheduler*, int*)+0x1562) [0x7f328ca34e02]",
        "(()+0x417556) [0x7f328c985556]",
        "(()+0x418e93) [0x7f328c986e93]",
        "(make_fcontext()+0x2f) [0x7f328d041f9f]" 
    ],
    "ceph_version": "15.2.10",
    "crash_id": "2021-10-01T11:07:12.124286Z_36aa0864-8446-4111-b511-1a21f1e73327",
    "entity_name": "client.rgw.mon-2s03.rgw0",
    "os_id": "centos",
    "os_name": "CentOS Linux",
    "os_version": "8",
    "os_version_id": "8",
    "process_name": "radosgw",
    "stack_sig": "f6c2aef1d0ab198dcd7b51d83bd958392ee0af50c91fce12fbc9c6bbf540d141",
    "timestamp": "2021-10-01T11:07:12.124286Z",
    "utsname_hostname": "mon-2s03",
    "utsname_machine": "x86_64",
    "utsname_release": "4.18.0-240.15.1.el8_3.x86_64",
    "utsname_sysname": "Linux",
    "utsname_version": "#1 SMP Mon Mar 1 17:16:16 UTC 2021" 
}

Files

tracker.PNG (168 KB) tracker.PNG osd latency Ist Gab, 10/01/2021 11:36 AM

Related issues 1 (0 open1 closed)

Is duplicate of rgw - Bug #51927: crash: RGWSI_MetaBackend_SObjResolvedCasey Bodley

Actions
Actions #1

Updated by Casey Bodley over 2 years ago

  • Is duplicate of Bug #51927: crash: RGWSI_MetaBackend_SObj added
Actions #2

Updated by Yaarit Hatuka over 2 years ago

  • Status changed from New to Duplicate

Changing status to 'Duplicate' to match this issue's 'Duplicates' relation.

Actions #3

Updated by J. Eric Ivancich over 2 years ago

  • Pull request ID set to 43581
Actions

Also available in: Atom PDF