Bug #14115
crypto: race in nss init
0%
Description
From running the script from #14089 in a loop:
(gdb) bt #0 0x00000031a7635c59 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 #1 0x00000031a7637368 in __GI_abort () at abort.c:89 #2 0x00000031aa260dd5 in __gnu_cxx::__verbose_terminate_handler() () from /lib64/libstdc++.so.6 #3 0x00000031aa25ed46 in ?? () from /lib64/libstdc++.so.6 #4 0x00000031aa25ed73 in std::terminate() () from /lib64/libstdc++.so.6 #5 0x00000031aa25ef93 in __cxa_throw () from /lib64/libstdc++.so.6 #6 0x00007fb454fe0c0f in ceph::__ceph_assert_fail (assertion=assertion@entry=0x7fb45527f6b7 "crypto_context != __null", file=file@entry=0x7fb45527f6a1 "common/ceph_crypto.cc", line=line@entry=73, func=func@entry=0x7fb45527f700 <ceph::crypto::init(CephContext*)::__PRETTY_FUNCTION__> "void ceph::crypto::init(CephContext*)") at common/assert.cc:77 #7 0x00007fb45502beb0 in ceph::crypto::init (cct=cct@entry=0x7fb34400b720) at common/ceph_crypto.cc:73 #8 0x00007fb454ff9679 in CephContext::init_crypto (this=this@entry=0x7fb34400b720) at common/ceph_context.cc:538 #9 0x00007fb454ff69f9 in common_init_finish (cct=0x7fb34400b720) at common/common_init.cc:118 #10 0x00007fb454f2850d in librados::RadosClient::connect (this=this@entry=0x7fb3440008c0) at librados/RadosClient.cc:229 #11 0x00007fb454efb90c in rados_connect (cluster=0x7fb3440008c0) at librados/librados.cc:2375 #12 0x00000031ab205d8c in ffi_call_unix64 ()
From running the script from #14089 in a loop:
(gdb) bt #0 0x00000031a7635c59 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 #1 0x00000031a7637368 in __GI_abort () at abort.c:89 #2 0x00000031aa260dd5 in __gnu_cxx::__verbose_terminate_handler() () from /lib64/libstdc++.so.6 #3 0x00000031aa25ed46 in ?? () from /lib64/libstdc++.so.6 #4 0x00000031aa25ed73 in std::terminate() () from /lib64/libstdc++.so.6 #5 0x00000031aa25ef93 in __cxa_throw () from /lib64/libstdc++.so.6 #6 0x00007fb454fe0c0f in ceph::__ceph_assert_fail (assertion=assertion@entry=0x7fb45527f6b7 "crypto_context != __null", file=file@entry=0x7fb45527f6a1 "common/ceph_crypto.cc", line=line@entry=73, func=func@entry=0x7fb45527f700 <ceph::crypto::init(CephContext*)::__PRETTY_FUNCTION__> "void ceph::crypto::init(CephContext*)") at common/assert.cc:77 #7 0x00007fb45502beb0 in ceph::crypto::init (cct=cct@entry=0x7fb34400b720) at common/ceph_crypto.cc:73 #8 0x00007fb454ff9679 in CephContext::init_crypto (this=this@entry=0x7fb34400b720) at common/ceph_context.cc:538 #9 0x00007fb454ff69f9 in common_init_finish (cct=0x7fb34400b720) at common/common_init.cc:118 #10 0x00007fb454f2850d in librados::RadosClient::connect (this=this@entry=0x7fb3440008c0) at librados/RadosClient.cc:229 #11 0x00007fb454efb90c in rados_connect (cluster=0x7fb3440008c0) at librados/librados.cc:2375 #12 0x00000031ab205d8c in ffi_call_unix64 () from /lib64/libffi.so.6 #13 0x00000031ab2056bc in ffi_call () from /lib64/libffi.so.6 #14 0x00007fb499973c8b in _ctypes_callproc () from /usr/lib64/python2.7/lib-dynload/_ctypes.so #15 0x00007fb49996da85 in PyCFuncPtr_call () from /usr/lib64/python2.7/lib-dynload/_ctypes.so #16 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0 #17 0x00000037ec8dd417 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0 #18 0x00000037ec8e0980 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0 #19 0x00000037ec8e0980 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0 #20 0x00000037ec8e21dd in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0 #21 0x00000037ec86f0d8 in ?? () from /lib64/libpython2.7.so.1.0 #22 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0 #23 0x00000037ec8590c5 in ?? () from /lib64/libpython2.7.so.1.0 #24 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0 #25 0x00000037ec8dc027 in PyEval_CallObjectWithKeywords () from /lib64/libpython2.7.so.1.0 #26 0x00000037ec910822 in ?? () from /lib64/libpython2.7.so.1.0 #27 0x00000031a7a07f33 in start_thread (arg=0x7fb382ffd700) at pthread_create.c:309 #28 0x00000031a76f4ead in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 (gdb) f 7 #7 0x00007fb45502beb0 in ceph::crypto::init (cct=cct@entry=0x7fb34400b720) at common/ceph_crypto.cc:73 73 assert(crypto_context != NULL); (gdb) list 68 } 69 crypto_context = NSS_InitContext(cct->_conf->nss_db_path.c_str(), "", "", 70 SECMOD_DB, &init_params, flags); 71 } 72 pthread_mutex_unlock(&crypto_init_mutex); 73 assert(crypto_context != NULL); 74 } 75 76 void ceph::crypto::shutdown() 77 { (gdb) p crypto_refs $1 = 3from /lib64/libffi.so.6
#13 0x00000031ab2056bc in ffi_call () from /lib64/libffi.so.6 #14 0x00007fb499973c8b in _ctypes_callproc () from /usr/lib64/python2.7/lib-dynload/_ctypes.so #15 0x00007fb49996da85 in PyCFuncPtr_call () from /usr/lib64/python2.7/lib-dynload/_ctypes.so #16 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0 #17 0x00000037ec8dd417 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0 #18 0x00000037ec8e0980 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0 #19 0x00000037ec8e0980 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0 #20 0x00000037ec8e21dd in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0 #21 0x00000037ec86f0d8 in ?? () from /lib64/libpython2.7.so.1.0 #22 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0 #23 0x00000037ec8590c5 in ?? () from /lib64/libpython2.7.so.1.0 #24 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0 #25 0x00000037ec8dc027 in PyEval_CallObjectWithKeywords () from /lib64/libpython2.7.so.1.0 #26 0x00000037ec910822 in ?? () from /lib64/libpython2.7.so.1.0 #27 0x00000031a7a07f33 in start_thread (arg=0x7fb382ffd700) at pthread_create.c:309 #28 0x00000031a76f4ead in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 (gdb) f 7 #7 0x00007fb45502beb0 in ceph::crypto::init (cct=cct@entry=0x7fb34400b720) at common/ceph_crypto.cc:73 73 assert(crypto_context != NULL); (gdb) list 68 } 69 crypto_context = NSS_InitContext(cct->_conf->nss_db_path.c_str(), "", "", 70 SECMOD_DB, &init_params, flags); 71 } 72 pthread_mutex_unlock(&crypto_init_mutex); 73 assert(crypto_context != NULL); 74 } 75 76 void ceph::crypto::shutdown() 77 { (gdb) p crypto_refs $1 = 3
Related issues
History
#1 Updated by Kefu Chai about 8 years ago
- Description updated (diff)
#2 Updated by Kefu Chai about 8 years ago
- Description updated (diff)
#3 Updated by Enno Gröper over 7 years ago
Any news on this? I think I'm running into this, too:
common/ceph_crypto.cc: In function 'void ceph::crypto::init(CephContext*)' thread 7f8d9cd208c0 time 2016-07-14 12:17:44.515176
common/ceph_crypto.cc: 73: FAILED assert(crypto_context != __null)
ceph version 0.94.6 (e832001feaf8c176593e0325c8298e3f16dfb403)
1: ./seafile/lib/librados.so.2() [0x3a16f7d910]
2: ./seafile/lib/librados.so.2() [0x3a16f4ae89]
3: ./seafile/lib/librados.so.2() [0x3a16f47ad8]
4: ./seafile/lib/librados.so.2() [0x3a16e837c8]
5: (rados_connect()+0x24) [0x3a16e571d4]
Anything I can do to help?
#4 Updated by Enno Gröper over 7 years ago
In my case the problem was related to incompatible libraries or such:
https://github.com/haiwen/seafile/issues/1720
When replacing the liraries bundled with software by the os supplied ones everything went fine.
#5 Updated by Sage Weil over 7 years ago
- Status changed from New to 12
- Priority changed from Normal to Urgent
/a/sage-2016-10-14_02:29:27-rados:singleton-wip-sage-testing---basic-smithi/473114
hitting this on the rocksdb librados test...
#6 Updated by Josh Durgin over 7 years ago
seeing this more often now, in 1/3 of 3 jobs: http://qa-proxy.ceph.com/teuthology/joshd-2016-11-02_00:23:24-rados-wip-17654---basic-mira/510008/teuthology.log
#7 Updated by Sage Weil over 7 years ago
/a/sage-2016-11-30_17:15:54-rados-wip-sage-testing---basic-smithi/590025
#8 Updated by Sage Weil over 7 years ago
Ah, I just discovered something. I was hitting this reliably and it was because I was leaking some objects, which presumably included some nss objects. The NSS_ShutdownContext was returning an error, and the next time we tried to initialize we'd get a NULL instead of a new context.
We should probably assert that shutdown succeeds. Or at least warn to stderr if it doesn't.
#9 Updated by Sage Weil over 7 years ago
#10 Updated by Nathan Cutler about 7 years ago
- Backport set to jewel
#11 Updated by Nathan Cutler about 7 years ago
- Backport changed from jewel to jewel,kraken
#12 Updated by Yuri Weinstein about 7 years ago
- Related to Bug #18478: "FAILED assert(crypto_context != __null)" in rados-kraken-distro-basic-smithi added
#13 Updated by Josh Durgin almost 7 years ago
Seems harder to hit in our test environment now, but I did see this in one recent run.
#14 Updated by Sage Weil almost 7 years ago
- Priority changed from Urgent to High
#15 Updated by Wyllys Ingersoll over 6 years ago
Still seeing this in Jewel 10.2.7, Ubuntu 16.04.2 running an application using ceph under Apache:
common/ceph_crypto.cc: In function 'void ceph::crypto::init(CephContext*)' thread 7f834e145700 time 2017-08-23 16:39:28.571316 common/ceph_crypto.cc: 77: FAILED assert(crypto_context != __null) ceph version 10.2.7 (50e863e0f4bc8f4b9e31156de690d765af245185) 1: (()+0x170f70) [0x7f8326a0bf70] 2: (()+0x1c0164) [0x7f8326a5b164] 3: (()+0x18a789) [0x7f8326a25789] 4: (()+0x187ab0) [0x7f8326a22ab0] 5: (()+0x8f1e0) [0x7f832692a1e0] 6: (rados_connect()+0x1b) [0x7f83268fb87b] 7: (()+0x5f6dc) [0x7f8334ceb6dc] 8: (PyEval_EvalFrameEx()+0x6f55) [0x7f83500cfc55] 9: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 10: (PyEval_EvalFrameEx()+0x6ffd) [0x7f83500cfcfd] 11: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 12: (PyEval_EvalFrameEx()+0x6ffd) [0x7f83500cfcfd] 13: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24] 14: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24] 15: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24] 16: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 17: (()+0x13e3dd) [0x7f83501503dd] 18: (PyObject_Call()+0x43) [0x7f83501231e3] 19: (()+0x18531c) [0x7f835019731c] 20: (PyObject_Call()+0x43) [0x7f83501231e3] 21: (()+0x1320bd) [0x7f83501440bd] 22: (()+0xc816f) [0x7f83500da16f] 23: (PyObject_Call()+0x43) [0x7f83501231e3] 24: (PyEval_EvalFrameEx()+0x543c) [0x7f83500ce13c] 25: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 26: (()+0x13e3dd) [0x7f83501503dd] 27: (PyObject_Call()+0x43) [0x7f83501231e3] 28: (()+0x18531c) [0x7f835019731c] 29: (PyObject_Call()+0x43) [0x7f83501231e3] 30: (()+0x1320bd) [0x7f83501440bd] 31: (()+0xc816f) [0x7f83500da16f] 32: (PyObject_Call()+0x43) [0x7f83501231e3] 33: (PyEval_EvalFrameEx()+0x543c) [0x7f83500ce13c] 34: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 35: (()+0x13e3dd) [0x7f83501503dd] 36: (PyObject_Call()+0x43) [0x7f83501231e3] 37: (()+0x18531c) [0x7f835019731c] 38: (PyObject_Call()+0x43) [0x7f83501231e3] 39: (()+0x1320bd) [0x7f83501440bd] 40: (()+0xc816f) [0x7f83500da16f] 41: (PyObject_Call()+0x43) [0x7f83501231e3] 42: (PyEval_EvalFrameEx()+0x543c) [0x7f83500ce13c] 43: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 44: (()+0x13e3dd) [0x7f83501503dd] 45: (PyObject_Call()+0x43) [0x7f83501231e3] 46: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c] 47: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 48: (()+0x13e3dd) [0x7f83501503dd] 49: (PyObject_Call()+0x43) [0x7f83501231e3] 50: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c] 51: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 52: (()+0x13e3dd) [0x7f83501503dd] 53: (PyObject_Call()+0x43) [0x7f83501231e3] 54: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c] 55: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 56: (()+0x13e3dd) [0x7f83501503dd] 57: (PyObject_Call()+0x43) [0x7f83501231e3] 58: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c] 59: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 60: (()+0x13e3dd) [0x7f83501503dd] 61: (PyObject_Call()+0x43) [0x7f83501231e3] 62: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c] 63: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24] 64: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24] 65: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24] 66: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c] 67: (()+0x13e2e0) [0x7f83501502e0] 68: (PyObject_Call()+0x43) [0x7f83501231e3] 69: (()+0x18531c) [0x7f835019731c] 70: (PyObject_Call()+0x43) [0x7f83501231e3] 71: (()+0x132465) [0x7f8350144465] 72: (PyObject_Call()+0x43) [0x7f83501231e3] 73: (PyEval_CallObjectWithKeywords()+0x47) [0x7f83501f9447] 74: (()+0x1b432) [0x7f83505bb432] 75: (()+0x1e14a) [0x7f83505be14a] 76: (()+0x76ba) [0x7f8354bae6ba] 77: (clone()+0x6d) [0x7f83548e482d]
#16 Updated by Patrick Donnelly over 4 years ago
- Status changed from 12 to New
#17 Updated by Sage Weil almost 3 years ago
- Project changed from Ceph to RADOS
#18 Updated by Neha Ojha about 2 years ago
- Status changed from New to Can't reproduce