Actions
Bug #62356
closedBug #61009: crash: void interval_set<T, C>::erase(T, T, std::function<bool(T, T)>) [with T = inodeno_t; C = std::map]: assert(p->first <= start)
mds: src/include/interval_set.h: 538: FAILED ceph_assert(p->first <= start)
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(FS):
Labels (FS):
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
sh-4.4$ ceph crash info 2023-07-26T06:01:19.880827Z_c6454cce-07f1-4287-89b4-58563f8485e1 { "archived": "2023-07-27 06:47:41.231060", "assert_condition": "p->first <= start", "assert_file": "/builddir/build/BUILD/ceph-16.2.7/src/include/interval_set.h", "assert_func": "void interval_set<T, C>::erase(T, T, std::function<bool(T, T)>) [with T = inodeno_t; C = std::map]", "assert_line": 538, "assert_msg": "/builddir/build/BUILD/ceph-16.2.7/src/include/interval_set.h: In function 'void interval_set<T, C>::erase(T, T, std::function<bool(T, T)>) [with T = inodeno_t; C = std::map]' thread 7f1137e46700 time 2023-07-26T06:01:19.877866+0000\n/builddir/build/BUILD/ceph-16.2.7/src/include/interval_set.h: 538: FAILED ceph_assert(p->first <= start)\n", "assert_thread_name": "md_log_replay", "backtrace": [ "/lib64/libpthread.so.0(+0x12ce0) [0x7f1146e5cce0]", "gsignal()", "abort()", "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a9) [0x7f1147e6ed4f]", "/usr/lib64/ceph/libceph-common.so.2(+0x276f18) [0x7f1147e6ef18]", "(interval_set<inodeno_t, std::map>::erase(inodeno_t, inodeno_t, std::function<bool (inodeno_t, inodeno_t)>)+0x2d1) [0x555d442b7b11]", "(interval_set<inodeno_t, std::map>::erase(inodeno_t)+0x33) [0x555d442b7b53]", "(EMetaBlob::replay(MDSRank*, LogSegment*, MDPeerUpdate*)+0x536c) [0x555d445898ec]", "(EUpdate::replay(MDSRank*)+0x40) [0x555d4458ba80]", "(MDLog::_replay_thread()+0xcd1) [0x555d44511db1]", "(MDLog::ReplayThread::entry()+0x11) [0x555d44213941]", "/lib64/libpthread.so.0(+0x81cf) [0x7f1146e521cf]", "clone()" ], "ceph_version": "16.2.7-126.el8cp", "crash_id": "2023-07-26T06:01:19.880827Z_c6454cce-07f1-4287-89b4-58563f8485e1", "entity_name": "mds.ocs-storagecluster-cephfilesystem-b", "os_id": "rhel", "os_name": "Red Hat Enterprise Linux", "os_version": "8.6 (Ootpa)", "os_version_id": "8.6", "process_name": "ceph-mds", "stack_sig": "30145fcf84b1c324cd2f8813c42d7125d6f8b94a0bf2d700a9210b0563c131d6", "timestamp": "2023-07-26T06:01:19.880827Z", "utsname_hostname": "rook-ceph-mds-ocs-storagecluster-cephfilesystem-b-5fcd98ff75f2j", "utsname_machine": "x86_64", "utsname_release": "4.18.0-305.88.1.el8_4.x86_64", "utsname_sysname": "Linux", "utsname_version": "#1 SMP Thu Apr 6 10:22:46 EDT 2023" }
It aborted in Line#1623. The session->take_ino() may return 0 if the used_preallocated_ino doesn't exist. Then when erasing the ino# 0 it will abort in the interval_set.
... 1604 if (sessionmapv) { 1605 if (mds->sessionmap.get_version() >= sessionmapv || 1606 unlikely(type == EVENT_UPDATE && skip_replaying_inotable)) { 1607 dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv 1608 << " <= table " << mds->sessionmap.get_version() << dendl; 1609 if (used_preallocated_ino) 1610 mds->mdcache->insert_taken_inos(used_preallocated_ino); 1611 } else { 1612 dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv 1613 << ", table " << mds->sessionmap.get_version() 1614 << " prealloc " << preallocated_inos 1615 << " used " << used_preallocated_ino 1616 << dendl; 1617 Session *session = mds->sessionmap.get_session(client_name); 1618 if (session) { 1619 dout(20) << " (session prealloc " << session->info.prealloc_inos << ")" << dendl; 1620 if (used_preallocated_ino) { 1621 if (!session->info.prealloc_inos.empty()) { 1622 inodeno_t ino = session->take_ino(used_preallocated_ino); 1623 session->info.prealloc_inos.erase(ino); 1624 ceph_assert(ino == used_preallocated_ino); 1625 } 1626 mds->sessionmap.replay_dirty_session(session); 1627 } 1628 if (!preallocated_inos.empty()) { 1629 session->free_prealloc_inos.insert(preallocated_inos); 1630 session->info.prealloc_inos.insert(preallocated_inos); 1631 mds->sessionmap.replay_dirty_session(session); 1632 } 1633 } else { ...
I am still checking why could this happen.
Actions