Actions
Bug #61363
openceph mds keeps crashing with ceph_assert(auth_pins == 0)
% Done:
0%
Source:
other
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(FS):
MDS, qa-suite
Labels (FS):
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
Description of problem (please be detailed as possible and provide log snippests): ceph mds crashing againa and agian. HEALTH_WARN 8 daemons have recently crashed [WRN] RECENT_CRASH: 8 daemons have recently crashed mds.ocs-storagecluster-cephfilesystem-b crashed on host rook-ceph-mds-ocs-storagecluster-cephfilesystem-b-5656c5b6wt8w7 at 2023-05-12T20:01:53.163715Z mds.ocs-storagecluster-cephfilesystem-b crashed on host rook-ceph-mds-ocs-storagecluster-cephfilesystem-b-5656c5b6wt8w7 at 2023-05-12T20:03:36.870549Z mds.ocs-storagecluster-cephfilesystem-a crashed on host rook-ceph-mds-ocs-storagecluster-cephfilesystem-a-795cc7d942glj at 2023-05-12T20:03:03.531284Z mds.ocs-storagecluster-cephfilesystem-a crashed on host rook-ceph-mds-ocs-storagecluster-cephfilesystem-a-795cc7d942glj at 2023-05-12T20:04:10.158712Z mds.ocs-storagecluster-cephfilesystem-b crashed on host rook-ceph-mds-ocs-storagecluster-cephfilesystem-b-5656c5b6wt8w7 at 2023-05-12T20:08:14.985327Z mds.ocs-storagecluster-cephfilesystem-b crashed on host rook-ceph-mds-ocs-storagecluster-cephfilesystem-b-5656c5b6wt8w7 at 2023-05-12T20:11:56.915029Z mds.ocs-storagecluster-cephfilesystem-a crashed on host rook-ceph-mds-ocs-storagecluster-cephfilesystem-a-795cc7d942glj at 2023-05-12T20:09:51.297969Z mds.ocs-storagecluster-cephfilesystem-a crashed on host rook-ceph-mds-ocs-storagecluster-cephfilesystem-a-795cc7d942glj at 2023-05-12T20:12:30.570734Z sh-4.4$ ceph crash ls ID ENTITY NEW 2023-03-09T05:00:43.115249Z_1d507426-ae1f-4369-a3be-d3d09df9bdff mds.ocs-storagecluster-cephfilesystem-a 2023-03-24T14:47:46.663452Z_37b8cd06-a7f1-43bf-bc5a-85c849139ac4 mds.ocs-storagecluster-cephfilesystem-a 2023-03-31T17:20:49.710179Z_587d7c64-464c-447f-862c-86bbb610816e mds.ocs-storagecluster-cephfilesystem-b 2023-04-22T13:23:27.685475Z_6ccb7993-0949-497b-ac67-6331a7e1c42a mds.ocs-storagecluster-cephfilesystem-b 2023-05-05T06:56:54.609947Z_f78461a0-383e-4b2f-882a-d7523403af57 mds.ocs-storagecluster-cephfilesystem-a 2023-05-12T20:01:53.163715Z_6a13746b-2bda-417f-b86f-05e9dd53fe49 mds.ocs-storagecluster-cephfilesystem-b * 2023-05-12T20:03:03.531284Z_5102cc52-de1b-4a05-a70c-9b8162c86cf3 mds.ocs-storagecluster-cephfilesystem-a * 2023-05-12T20:03:36.870549Z_b520a78d-2da9-4b1a-a87c-7198eb0542d7 mds.ocs-storagecluster-cephfilesystem-b * 2023-05-12T20:04:10.158712Z_7d1dad4f-b8f9-48ed-a32f-8102b34a7504 mds.ocs-storagecluster-cephfilesystem-a * 2023-05-12T20:08:14.985327Z_74e3fc4f-21ad-4749-a5a7-0400ce534d55 mds.ocs-storagecluster-cephfilesystem-b * 2023-05-12T20:09:51.297969Z_1ab48ef4-b71d-4161-b850-920a5b1f0657 mds.ocs-storagecluster-cephfilesystem-a * 2023-05-12T20:11:56.915029Z_759c8ac1-0cb1-451d-93a1-e7685c1e10d3 mds.ocs-storagecluster-cephfilesystem-b * 2023-05-12T20:12:30.570734Z_ee9075f7-fe92-46bf-af1b-421d93874bcf mds.ocs-storagecluster-cephfilesystem-a * sh-4.4$ ceph crash stat 13 crashes recorded 5 older than 1 days old: 2023-03-09T05:00:43.115249Z_1d507426-ae1f-4369-a3be-d3d09df9bdff 2023-03-24T14:47:46.663452Z_37b8cd06-a7f1-43bf-bc5a-85c849139ac4 2023-03-31T17:20:49.710179Z_587d7c64-464c-447f-862c-86bbb610816e 2023-04-22T13:23:27.685475Z_6ccb7993-0949-497b-ac67-6331a7e1c42a 2023-05-05T06:56:54.609947Z_f78461a0-383e-4b2f-882a-d7523403af57 5 older than 3 days old: 2023-03-09T05:00:43.115249Z_1d507426-ae1f-4369-a3be-d3d09df9bdff 2023-03-24T14:47:46.663452Z_37b8cd06-a7f1-43bf-bc5a-85c849139ac4 2023-03-31T17:20:49.710179Z_587d7c64-464c-447f-862c-86bbb610816e 2023-04-22T13:23:27.685475Z_6ccb7993-0949-497b-ac67-6331a7e1c42a 2023-05-05T06:56:54.609947Z_f78461a0-383e-4b2f-882a-d7523403af57 5 older than 7 days old: 2023-03-09T05:00:43.115249Z_1d507426-ae1f-4369-a3be-d3d09df9bdff 2023-03-24T14:47:46.663452Z_37b8cd06-a7f1-43bf-bc5a-85c849139ac4 2023-03-31T17:20:49.710179Z_587d7c64-464c-447f-862c-86bbb610816e 2023-04-22T13:23:27.685475Z_6ccb7993-0949-497b-ac67-6331a7e1c42a 2023-05-05T06:56:54.609947Z_f78461a0-383e-4b2f-882a-d7523403af57 sh-4.4$ ceph crash info 2023-05-12T20:01:53.163715Z_6a13746b-2bda-417f-b86f-05e9dd53fe49 { "assert_condition": "auth_pins == 0", "assert_file": "/builddir/build/BUILD/ceph-16.2.7/src/mds/CDir.cc", "assert_func": "void CDir::finish_old_fragment(MDSContext::vec&, bool)", "assert_line": 965, "assert_msg": "/builddir/build/BUILD/ceph-16.2.7/src/mds/CDir.cc: In function 'void CDir::finish_old_fragment(MDSContext::vec&, bool)' thread 7ff5fd20d700 time 2023-05-12T20:01:53.159673+0000\n/builddir/build/BUILD/ceph-16.2.7/src/mds/CDir.cc: 965: FAILED ceph_assert(auth_pins == 0)\n", "assert_thread_name": "ms_dispatch", "backtrace": [ "/lib64/libpthread.so.0(+0x12ce0) [0x7ff604a14ce0]", "gsignal()", "abort()", "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a9) [0x7ff605a26d4f]", "/usr/lib64/ceph/libceph-common.so.2(+0x276f18) [0x7ff605a26f18]", "(CDir::finish_old_fragment(std::vector<MDSContext*, std::allocator<MDSContext*> >&, bool)+0x1da) [0x55cca25a222a]", "(CDir::split(int, std::vector<CDir*, std::allocator<CDir*> >*, std::vector<MDSContext*, std::allocator<MDSContext*> >&, bool)+0x1d37) [0x55cca25a3f87]", "(MDCache::adjust_dir_fragments(CInode*, std::vector<CDir*, std::allocator<CDir*> > const&, frag_t, int, std::vector<CDir*, std::allocator<CDir*> >*, std::vector<MDSContext*, std::allocator<MDSContext*> >&, bool)+0x278) [0x55cca2480238]", "(MDCache::dispatch_fragment_dir(boost::intrusive_ptr<MDRequestImpl>&)+0x8df) [0x55cca248720f]", "(MDCache::dispatch_request(boost::intrusive_ptr<MDRequestImpl>&)+0xa5) [0x55cca2494c85]", "(MDSContext::complete(int)+0x203) [0x55cca2651263]", "(void finish_contexts<std::vector<MDSContext*, std::allocator<MDSContext*> > >(ceph::common::CephContext*, std::vector<MDSContext*, std::allocator<MDSContext*> >&, int)+0x8d) [0x55cca2346cad]", "(Locker::eval(CInode*, int, bool)+0x4d8) [0x55cca25267e8]", "(Locker::handle_client_caps(boost::intrusive_ptr<MClientCaps const> const&)+0x204c) [0x55cca253691c]", "(Locker::dispatch(boost::intrusive_ptr<Message const> const&)+0x104) [0x55cca2538524]", "(MDSRank::handle_message(boost::intrusive_ptr<Message const> const&)+0xbcc) [0x55cca234d6ec]", "(MDSRank::_dispatch(boost::intrusive_ptr<Message const> const&, bool)+0x7bb) [0x55cca235008b]", "(MDSRankDispatcher::ms_dispatch(boost::intrusive_ptr<Message const> const&)+0x55) [0x55cca2350685]", "(MDSDaemon::ms_dispatch2(boost::intrusive_ptr<Message> const&)+0x108) [0x55cca23402a8]", "(DispatchQueue::entry()+0x126a) [0x7ff605c6c2aa]", "(DispatchQueue::DispatchThread::entry()+0x11) [0x7ff605d1df91]", "/lib64/libpthread.so.0(+0x81cf) [0x7ff604a0a1cf]", "clone()" ], "ceph_version": "16.2.7-126.el8cp", "crash_id": "2023-05-12T20:01:53.163715Z_6a13746b-2bda-417f-b86f-05e9dd53fe49", "entity_name": "mds.ocs-storagecluster-cephfilesystem-b", "os_id": "rhel", "os_name": "Red Hat Enterprise Linux", "os_version": "8.6 (Ootpa)", "os_version_id": "8.6", "process_name": "ceph-mds", "stack_sig": "e5ef752f8581195fddadc2d5fa4c741aeee93fa864e6e8a5ed8fb68d1535b7ef", "timestamp": "2023-05-12T20:01:53.163715Z", "utsname_hostname": "rook-ceph-mds-ocs-storagecluster-cephfilesystem-b-5656c5b6wt8w7", "utsname_machine": "x86_64", "utsname_release": "4.18.0-305.76.1.el8_4.x86_64", "utsname_sysname": "Linux", "utsname_version": "#1 SMP Thu Jan 12 10:05:36 EST 2023" }
Actions