Bug #37997
Updated by Ernesto Puerta over 5 years ago
Build from master (a969c9d4061713a8614b64e27fde560e1cfb2d08), containerized RHEL7 default vstart cluster. It has happened several times during restart or disabling/enabling ceph-mgr modules (dashboard). Looks like a race condition issue, a deadlock between several threads accessing config options. Potentially troublesome code seems to point to "this recently merged code dealing with module options":https://github.com/ceph/ceph/pull/25651/files#diff-dfd7f5b3fd9db9edc5da542ce8ed2a0dR127 options": https://github.com/ceph/ceph/pull/25651/files#diff-dfd7f5b3fd9db9edc5da542ce8ed2a0dR127 I'll be watchful to see if I manage to get it reproducible. Trace dump (mgr.x.log): <pre> ceph version Development (no_version) nautilus (dev) 1: (Mutex::_will_lock()+0x4c) [0x7fa7d5971ef2] 2: (Mutex::lock(bool)+0x51) [0x7fa7d5971cfb] 3: (std::lock_guard<Mutex>::lock_guard(Mutex&)+0x2f) [0x55b9a26f552f] 4: (PyModuleRegistry::module_exists(std::string const&) const+0x33) [0x55b9a2706d75] 5: (ActivePyModules::get_typed_config(std::string const&, std::string const&) const+0x50) [0x55b9a26fc3b2] 6: (()+0x68bb59) [0x55b9a271cb59] 7: (()+0x68be3c) [0x55b9a271ce3c] 8: (PyEval_EvalFrameEx()+0x6df0) [0x7fa7d4582cf0] 9: (PyEval_EvalFrameEx()+0x67bd) [0x7fa7d45826bd] 10: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 11: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 12: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 13: (()+0x70978) [0x7fa7d450e978] 14: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 15: (PyEval_EvalFrameEx()+0x17fd) [0x7fa7d457d6fd] 16: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 17: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 18: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 19: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 20: (PyEval_EvalFrameEx()+0x67bd) [0x7fa7d45826bd] 21: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 22: (()+0x70978) [0x7fa7d450e978] 23: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 24: (()+0x5aa55) [0x7fa7d44f8a55] 25: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 26: (()+0xa2e27) [0x7fa7d4540e27] 27: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 28: (PyEval_EvalFrameEx()+0x2336) [0x7fa7d457e236] 29: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 30: (()+0x70978) [0x7fa7d450e978] 31: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 32: (()+0x5aa55) [0x7fa7d44f8a55] 33: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 34: (()+0xa2a87) [0x7fa7d4540a87] 35: (()+0xa179f) [0x7fa7d453f79f] 36: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 37: (PyEval_CallObjectWithKeywords()+0x47) [0x7fa7d457b8f7] 38: (ActivePyModule::load(ActivePyModules*)+0x127) [0x55b9a26f2c13] 39: (ActivePyModules::start_one(std::shared_ptr<PyModule>)+0x155) [0x55b9a26faaff] 40: (PyModuleRegistry::active_start(DaemonStateIndex&, ClusterState&, std::map<std::string, std::string, std::less<std::string>, std::allocator<std::pair<std::string const, std::string> > > const&, MonClient&, std::shared_ptr<LogChannel>, std::shared_ptr<LogChannel>, Objecter&, Client&, Finisher&, DaemonServer&)+0x4ee) [0x55b9a282deb0] 41: (Mgr::init()+0xcc4) [0x55b9a27dd8c6] 42: (()+0x74a9d6) [0x55b9a27db9d6] 43: (()+0x751b54) [0x55b9a27e2b54] 44: (boost::function1<void, int>::operator()(int) const+0x6c) [0x55b9a270789c] 45: (FunctionContext::finish(int)+0x24) [0x55b9a2704624] 46: (Context::complete(int)+0x27) [0x55b9a27044eb] 47: (Finisher::finisher_thread_entry()+0x38b) [0x7fa7d5918cf9] 48: (Finisher::FinisherThread::entry()+0x1c) [0x55b9a27e39a4] 49: (Thread::entry_wrapper()+0x78) [0x7fa7d59859dc] 50: (Thread::_entry_func(void*)+0x18) [0x7fa7d598595a] 51: (()+0x7dd5) [0x7fa7d216ddd5] 52: (clone()+0x6d) [0x7fa7d0e1dead] -1> 2019-01-21 20:01:02.051 7fa7b697c700 -1 /ceph/src/common/lockdep.cc: In function 'int lockdep_will_lock(const char*, int, bool, bool)' thread 7fa7b697c700 time 2019-01-21 20:01:02.041972 /ceph/src/common/lockdep.cc: 305: abort() ceph version Development (no_version) nautilus (dev) 1: (ceph::__ceph_abort(char const*, int, char const*, std::string const&)+0xfe) [0x7fa7d59e54c8] 2: (lockdep_will_lock(char const*, int, bool, bool)+0x423) [0x7fa7d5af9d61] 3: (Mutex::_will_lock()+0x4c) [0x7fa7d5971ef2] 4: (Mutex::lock(bool)+0x51) [0x7fa7d5971cfb] 5: (std::lock_guard<Mutex>::lock_guard(Mutex&)+0x2f) [0x55b9a26f552f] 6: (PyModuleRegistry::module_exists(std::string const&) const+0x33) [0x55b9a2706d75] 7: (ActivePyModules::get_typed_config(std::string const&, std::string const&) const+0x50) [0x55b9a26fc3b2] 8: (()+0x68bb59) [0x55b9a271cb59] 9: (()+0x68be3c) [0x55b9a271ce3c] 10: (PyEval_EvalFrameEx()+0x6df0) [0x7fa7d4582cf0] 11: (PyEval_EvalFrameEx()+0x67bd) [0x7fa7d45826bd] 12: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 13: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 14: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 15: (()+0x70978) [0x7fa7d450e978] 16: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 17: (PyEval_EvalFrameEx()+0x17fd) [0x7fa7d457d6fd] 18: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 19: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 20: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 21: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 22: (PyEval_EvalFrameEx()+0x67bd) [0x7fa7d45826bd] 23: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 24: (()+0x70978) [0x7fa7d450e978] 25: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 26: (()+0x5aa55) [0x7fa7d44f8a55] 27: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 28: (()+0xa2e27) [0x7fa7d4540e27] 29: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 30: (PyEval_EvalFrameEx()+0x2336) [0x7fa7d457e236] 31: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 32: (()+0x70978) [0x7fa7d450e978] 33: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 34: (()+0x5aa55) [0x7fa7d44f8a55] 35: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 36: (()+0xa2a87) [0x7fa7d4540a87] 37: (()+0xa179f) [0x7fa7d453f79f] 38: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 39: (PyEval_CallObjectWithKeywords()+0x47) [0x7fa7d457b8f7] 40: (ActivePyModule::load(ActivePyModules*)+0x127) [0x55b9a26f2c13] 41: (ActivePyModules::start_one(std::shared_ptr<PyModule>)+0x155) [0x55b9a26faaff] 42: (PyModuleRegistry::active_start(DaemonStateIndex&, ClusterState&, std::map<std::string, std::string, std::less<std::string>, std::allocator<std::pair<std::string const, std::string> > > const&, MonClient&, std::shared_ptr<LogChannel>, std::shared_ptr<LogChannel>, Objecter&, Client&, Finisher&, DaemonServer&)+0x4ee) [0x55b9a282deb0] 43: (Mgr::init()+0xcc4) [0x55b9a27dd8c6] 44: (()+0x74a9d6) [0x55b9a27db9d6] 45: (()+0x751b54) [0x55b9a27e2b54] 46: (boost::function1<void, int>::operator()(int) const+0x6c) [0x55b9a270789c] 47: (FunctionContext::finish(int)+0x24) [0x55b9a2704624] 48: (Context::complete(int)+0x27) [0x55b9a27044eb] 49: (Finisher::finisher_thread_entry()+0x38b) [0x7fa7d5918cf9] 50: (Finisher::FinisherThread::entry()+0x1c) [0x55b9a27e39a4] 51: (Thread::entry_wrapper()+0x78) [0x7fa7d59859dc] 52: (Thread::_entry_func(void*)+0x18) [0x7fa7d598595a] 53: (()+0x7dd5) [0x7fa7d216ddd5] 54: (clone()+0x6d) [0x7fa7d0e1dead] 0> 2019-01-21 20:01:02.063 7fa7b697c700 -1 *** Caught signal (Aborted) ** in thread 7fa7b697c700 thread_name:mgr-fin ceph version Development (no_version) nautilus (dev) 1: (()+0x93c7a0) [0x55b9a29cd7a0] 2: (()+0xf5d0) [0x7fa7d21755d0] 3: (gsignal()+0x37) [0x7fa7d0d56207] 4: (abort()+0x148) [0x7fa7d0d578f8] 5: (ceph::__ceph_abort(char const*, int, char const*, std::string const&)+0x377) [0x7fa7d59e5741] 6: (lockdep_will_lock(char const*, int, bool, bool)+0x423) [0x7fa7d5af9d61] 7: (Mutex::_will_lock()+0x4c) [0x7fa7d5971ef2] 8: (Mutex::lock(bool)+0x51) [0x7fa7d5971cfb] 9: (std::lock_guard<Mutex>::lock_guard(Mutex&)+0x2f) [0x55b9a26f552f] 10: (PyModuleRegistry::module_exists(std::string const&) const+0x33) [0x55b9a2706d75] 11: (ActivePyModules::get_typed_config(std::string const&, std::string const&) const+0x50) [0x55b9a26fc3b2] 12: (()+0x68bb59) [0x55b9a271cb59] 13: (()+0x68be3c) [0x55b9a271ce3c] 14: (PyEval_EvalFrameEx()+0x6df0) [0x7fa7d4582cf0] 15: (PyEval_EvalFrameEx()+0x67bd) [0x7fa7d45826bd] 16: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 17: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 18: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 19: (()+0x70978) [0x7fa7d450e978] 20: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 21: (PyEval_EvalFrameEx()+0x17fd) [0x7fa7d457d6fd] 22: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 23: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 24: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 25: (PyEval_EvalFrameEx()+0x663c) [0x7fa7d458253c] 26: (PyEval_EvalFrameEx()+0x67bd) [0x7fa7d45826bd] 27: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 28: (()+0x70978) [0x7fa7d450e978] 29: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 30: (()+0x5aa55) [0x7fa7d44f8a55] 31: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 32: (()+0xa2e27) [0x7fa7d4540e27] 33: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 34: (PyEval_EvalFrameEx()+0x2336) [0x7fa7d457e236] 35: (PyEval_EvalCodeEx()+0x7ed) [0x7fa7d458503d] 36: (()+0x70978) [0x7fa7d450e978] 37: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 38: (()+0x5aa55) [0x7fa7d44f8a55] 39: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 40: (()+0xa2a87) [0x7fa7d4540a87] 41: (()+0xa179f) [0x7fa7d453f79f] 42: (PyObject_Call()+0x43) [0x7fa7d44e9a63] 43: (PyEval_CallObjectWithKeywords()+0x47) [0x7fa7d457b8f7] 44: (ActivePyModule::load(ActivePyModules*)+0x127) [0x55b9a26f2c13] 45: (ActivePyModules::start_one(std::shared_ptr<PyModule>)+0x155) [0x55b9a26faaff] 46: (PyModuleRegistry::active_start(DaemonStateIndex&, ClusterState&, std::map<std::string, std::string, std::less<std::string>, std::allocator<std::pair<std::string const, std::string> > > const&, MonClient&, std::shared_ptr<LogChannel>, std::shared_ptr<LogChannel>, Objecter&, Client&, Finisher&, DaemonServer&)+0x4ee) [0x55b9a282deb0] 47: (Mgr::init()+0xcc4) [0x55b9a27dd8c6] 48: (()+0x74a9d6) [0x55b9a27db9d6] 49: (()+0x751b54) [0x55b9a27e2b54] 50: (boost::function1<void, int>::operator()(int) const+0x6c) [0x55b9a270789c] 51: (FunctionContext::finish(int)+0x24) [0x55b9a2704624] 52: (Context::complete(int)+0x27) [0x55b9a27044eb] 53: (Finisher::finisher_thread_entry()+0x38b) [0x7fa7d5918cf9] 54: (Finisher::FinisherThread::entry()+0x1c) [0x55b9a27e39a4] 55: (Thread::entry_wrapper()+0x78) [0x7fa7d59859dc] 56: (Thread::_entry_func(void*)+0x18) [0x7fa7d598595a] 57: (()+0x7dd5) [0x7fa7d216ddd5] 58: (clone()+0x6d) [0x7fa7d0e1dead] </pre>