Project

General

Profile

Bug #14115

crypto: race in nss init

Added by Josh Durgin over 8 years ago. Updated about 2 years ago.

Status:
Can't reproduce
Priority:
High
Assignee:
-
Category:
-
Target version:
-
% Done:

0%

Source:
other
Tags:
Backport:
jewel,kraken
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(RADOS):
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

From running the script from #14089 in a loop:

(gdb) bt
#0  0x00000031a7635c59 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1  0x00000031a7637368 in __GI_abort () at abort.c:89
#2  0x00000031aa260dd5 in __gnu_cxx::__verbose_terminate_handler() () from /lib64/libstdc++.so.6
#3  0x00000031aa25ed46 in ?? () from /lib64/libstdc++.so.6
#4  0x00000031aa25ed73 in std::terminate() () from /lib64/libstdc++.so.6
#5  0x00000031aa25ef93 in __cxa_throw () from /lib64/libstdc++.so.6
#6  0x00007fb454fe0c0f in ceph::__ceph_assert_fail (assertion=assertion@entry=0x7fb45527f6b7 "crypto_context != __null",
    file=file@entry=0x7fb45527f6a1 "common/ceph_crypto.cc", line=line@entry=73,
    func=func@entry=0x7fb45527f700 <ceph::crypto::init(CephContext*)::__PRETTY_FUNCTION__> "void ceph::crypto::init(CephContext*)") at common/assert.cc:77
#7  0x00007fb45502beb0 in ceph::crypto::init (cct=cct@entry=0x7fb34400b720) at common/ceph_crypto.cc:73
#8  0x00007fb454ff9679 in CephContext::init_crypto (this=this@entry=0x7fb34400b720) at common/ceph_context.cc:538
#9  0x00007fb454ff69f9 in common_init_finish (cct=0x7fb34400b720) at common/common_init.cc:118
#10 0x00007fb454f2850d in librados::RadosClient::connect (this=this@entry=0x7fb3440008c0) at librados/RadosClient.cc:229
#11 0x00007fb454efb90c in rados_connect (cluster=0x7fb3440008c0) at librados/librados.cc:2375
#12 0x00000031ab205d8c in ffi_call_unix64 ()

From running the script from #14089 in a loop:
(gdb) bt
#0  0x00000031a7635c59 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1  0x00000031a7637368 in __GI_abort () at abort.c:89
#2  0x00000031aa260dd5 in __gnu_cxx::__verbose_terminate_handler() () from /lib64/libstdc++.so.6
#3  0x00000031aa25ed46 in ?? () from /lib64/libstdc++.so.6
#4  0x00000031aa25ed73 in std::terminate() () from /lib64/libstdc++.so.6
#5  0x00000031aa25ef93 in __cxa_throw () from /lib64/libstdc++.so.6
#6  0x00007fb454fe0c0f in ceph::__ceph_assert_fail (assertion=assertion@entry=0x7fb45527f6b7 "crypto_context != __null",
    file=file@entry=0x7fb45527f6a1 "common/ceph_crypto.cc", line=line@entry=73,
    func=func@entry=0x7fb45527f700 <ceph::crypto::init(CephContext*)::__PRETTY_FUNCTION__> "void ceph::crypto::init(CephContext*)") at common/assert.cc:77
#7  0x00007fb45502beb0 in ceph::crypto::init (cct=cct@entry=0x7fb34400b720) at common/ceph_crypto.cc:73
#8  0x00007fb454ff9679 in CephContext::init_crypto (this=this@entry=0x7fb34400b720) at common/ceph_context.cc:538
#9  0x00007fb454ff69f9 in common_init_finish (cct=0x7fb34400b720) at common/common_init.cc:118
#10 0x00007fb454f2850d in librados::RadosClient::connect (this=this@entry=0x7fb3440008c0) at librados/RadosClient.cc:229
#11 0x00007fb454efb90c in rados_connect (cluster=0x7fb3440008c0) at librados/librados.cc:2375
#12 0x00000031ab205d8c in ffi_call_unix64 () from /lib64/libffi.so.6
#13 0x00000031ab2056bc in ffi_call () from /lib64/libffi.so.6
#14 0x00007fb499973c8b in _ctypes_callproc () from /usr/lib64/python2.7/lib-dynload/_ctypes.so
#15 0x00007fb49996da85 in PyCFuncPtr_call () from /usr/lib64/python2.7/lib-dynload/_ctypes.so
#16 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#17 0x00000037ec8dd417 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#18 0x00000037ec8e0980 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#19 0x00000037ec8e0980 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#20 0x00000037ec8e21dd in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0
#21 0x00000037ec86f0d8 in ?? () from /lib64/libpython2.7.so.1.0
#22 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#23 0x00000037ec8590c5 in ?? () from /lib64/libpython2.7.so.1.0
#24 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#25 0x00000037ec8dc027 in PyEval_CallObjectWithKeywords () from /lib64/libpython2.7.so.1.0
#26 0x00000037ec910822 in ?? () from /lib64/libpython2.7.so.1.0
#27 0x00000031a7a07f33 in start_thread (arg=0x7fb382ffd700) at pthread_create.c:309
#28 0x00000031a76f4ead in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
(gdb) f 7
#7  0x00007fb45502beb0 in ceph::crypto::init (cct=cct@entry=0x7fb34400b720) at common/ceph_crypto.cc:73
73        assert(crypto_context != NULL);
(gdb) list
68          }
69          crypto_context = NSS_InitContext(cct->_conf->nss_db_path.c_str(), "", "",
70                                           SECMOD_DB, &init_params, flags);
71        }
72        pthread_mutex_unlock(&crypto_init_mutex);
73        assert(crypto_context != NULL);
74      }
75
76      void ceph::crypto::shutdown()
77      {
(gdb) p crypto_refs
$1 = 3
from /lib64/libffi.so.6
#13 0x00000031ab2056bc in ffi_call () from /lib64/libffi.so.6
#14 0x00007fb499973c8b in _ctypes_callproc () from /usr/lib64/python2.7/lib-dynload/_ctypes.so
#15 0x00007fb49996da85 in PyCFuncPtr_call () from /usr/lib64/python2.7/lib-dynload/_ctypes.so
#16 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#17 0x00000037ec8dd417 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#18 0x00000037ec8e0980 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#19 0x00000037ec8e0980 in PyEval_EvalFrameEx () from /lib64/libpython2.7.so.1.0
#20 0x00000037ec8e21dd in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0
#21 0x00000037ec86f0d8 in ?? () from /lib64/libpython2.7.so.1.0
#22 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#23 0x00000037ec8590c5 in ?? () from /lib64/libpython2.7.so.1.0
#24 0x00000037ec84a0d3 in PyObject_Call () from /lib64/libpython2.7.so.1.0
#25 0x00000037ec8dc027 in PyEval_CallObjectWithKeywords () from /lib64/libpython2.7.so.1.0
#26 0x00000037ec910822 in ?? () from /lib64/libpython2.7.so.1.0
#27 0x00000031a7a07f33 in start_thread (arg=0x7fb382ffd700) at pthread_create.c:309
#28 0x00000031a76f4ead in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
(gdb) f 7
#7  0x00007fb45502beb0 in ceph::crypto::init (cct=cct@entry=0x7fb34400b720) at common/ceph_crypto.cc:73
73        assert(crypto_context != NULL);
(gdb) list
68          }
69          crypto_context = NSS_InitContext(cct->_conf->nss_db_path.c_str(), "", "",
70                                           SECMOD_DB, &init_params, flags);
71        }
72        pthread_mutex_unlock(&crypto_init_mutex);
73        assert(crypto_context != NULL);
74      }
75
76      void ceph::crypto::shutdown()
77      {
(gdb) p crypto_refs
$1 = 3


Related issues

Related to Ceph - Bug #18478: "FAILED assert(crypto_context != __null)" in rados-kraken-distro-basic-smithi Closed

History

#1 Updated by Kefu Chai about 8 years ago

  • Description updated (diff)

#2 Updated by Kefu Chai about 8 years ago

  • Description updated (diff)

#3 Updated by Enno Gröper over 7 years ago

Any news on this? I think I'm running into this, too:

common/ceph_crypto.cc: In function 'void ceph::crypto::init(CephContext*)' thread 7f8d9cd208c0 time 2016-07-14 12:17:44.515176
common/ceph_crypto.cc: 73: FAILED assert(crypto_context != __null)
ceph version 0.94.6 (e832001feaf8c176593e0325c8298e3f16dfb403)
1: ./seafile/lib/librados.so.2() [0x3a16f7d910]
2: ./seafile/lib/librados.so.2() [0x3a16f4ae89]
3: ./seafile/lib/librados.so.2() [0x3a16f47ad8]
4: ./seafile/lib/librados.so.2() [0x3a16e837c8]
5: (rados_connect()+0x24) [0x3a16e571d4]

Anything I can do to help?

#4 Updated by Enno Gröper over 7 years ago

In my case the problem was related to incompatible libraries or such:
https://github.com/haiwen/seafile/issues/1720

When replacing the liraries bundled with software by the os supplied ones everything went fine.

#5 Updated by Sage Weil over 7 years ago

  • Status changed from New to 12
  • Priority changed from Normal to Urgent

/a/sage-2016-10-14_02:29:27-rados:singleton-wip-sage-testing---basic-smithi/473114

hitting this on the rocksdb librados test...

#7 Updated by Sage Weil over 7 years ago

/a/sage-2016-11-30_17:15:54-rados-wip-sage-testing---basic-smithi/590025

#8 Updated by Sage Weil over 7 years ago

Ah, I just discovered something. I was hitting this reliably and it was because I was leaking some objects, which presumably included some nss objects. The NSS_ShutdownContext was returning an error, and the next time we tried to initialize we'd get a NULL instead of a new context.

We should probably assert that shutdown succeeds. Or at least warn to stderr if it doesn't.

#10 Updated by Nathan Cutler about 7 years ago

  • Backport set to jewel

#11 Updated by Nathan Cutler about 7 years ago

  • Backport changed from jewel to jewel,kraken

#12 Updated by Yuri Weinstein about 7 years ago

  • Related to Bug #18478: "FAILED assert(crypto_context != __null)" in rados-kraken-distro-basic-smithi added

#13 Updated by Josh Durgin almost 7 years ago

Seems harder to hit in our test environment now, but I did see this in one recent run.

#14 Updated by Sage Weil almost 7 years ago

  • Priority changed from Urgent to High

#15 Updated by Wyllys Ingersoll over 6 years ago

Still seeing this in Jewel 10.2.7, Ubuntu 16.04.2 running an application using ceph under Apache:

common/ceph_crypto.cc: In function 'void ceph::crypto::init(CephContext*)' thread 7f834e145700 time 2017-08-23 16:39:28.571316
common/ceph_crypto.cc: 77: FAILED assert(crypto_context != __null)
 ceph version 10.2.7 (50e863e0f4bc8f4b9e31156de690d765af245185)
 1: (()+0x170f70) [0x7f8326a0bf70]
 2: (()+0x1c0164) [0x7f8326a5b164]
 3: (()+0x18a789) [0x7f8326a25789]
 4: (()+0x187ab0) [0x7f8326a22ab0]
 5: (()+0x8f1e0) [0x7f832692a1e0]
 6: (rados_connect()+0x1b) [0x7f83268fb87b]
 7: (()+0x5f6dc) [0x7f8334ceb6dc]
 8: (PyEval_EvalFrameEx()+0x6f55) [0x7f83500cfc55]
 9: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 10: (PyEval_EvalFrameEx()+0x6ffd) [0x7f83500cfcfd]
 11: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 12: (PyEval_EvalFrameEx()+0x6ffd) [0x7f83500cfcfd]
 13: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24]
 14: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24]
 15: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24]
 16: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 17: (()+0x13e3dd) [0x7f83501503dd]
 18: (PyObject_Call()+0x43) [0x7f83501231e3]
 19: (()+0x18531c) [0x7f835019731c]
 20: (PyObject_Call()+0x43) [0x7f83501231e3]
 21: (()+0x1320bd) [0x7f83501440bd]
 22: (()+0xc816f) [0x7f83500da16f]
 23: (PyObject_Call()+0x43) [0x7f83501231e3]
 24: (PyEval_EvalFrameEx()+0x543c) [0x7f83500ce13c]
 25: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 26: (()+0x13e3dd) [0x7f83501503dd]
 27: (PyObject_Call()+0x43) [0x7f83501231e3]
 28: (()+0x18531c) [0x7f835019731c]
 29: (PyObject_Call()+0x43) [0x7f83501231e3]
 30: (()+0x1320bd) [0x7f83501440bd]
 31: (()+0xc816f) [0x7f83500da16f]
 32: (PyObject_Call()+0x43) [0x7f83501231e3]
 33: (PyEval_EvalFrameEx()+0x543c) [0x7f83500ce13c]
 34: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 35: (()+0x13e3dd) [0x7f83501503dd]
 36: (PyObject_Call()+0x43) [0x7f83501231e3]
 37: (()+0x18531c) [0x7f835019731c]
 38: (PyObject_Call()+0x43) [0x7f83501231e3]
 39: (()+0x1320bd) [0x7f83501440bd]
 40: (()+0xc816f) [0x7f83500da16f]
 41: (PyObject_Call()+0x43) [0x7f83501231e3]
 42: (PyEval_EvalFrameEx()+0x543c) [0x7f83500ce13c]
 43: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 44: (()+0x13e3dd) [0x7f83501503dd]
 45: (PyObject_Call()+0x43) [0x7f83501231e3]
 46: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c]
 47: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 48: (()+0x13e3dd) [0x7f83501503dd]
 49: (PyObject_Call()+0x43) [0x7f83501231e3]
 50: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c]
 51: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 52: (()+0x13e3dd) [0x7f83501503dd]
 53: (PyObject_Call()+0x43) [0x7f83501231e3]
 54: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c]
 55: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 56: (()+0x13e3dd) [0x7f83501503dd]
 57: (PyObject_Call()+0x43) [0x7f83501231e3]
 58: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c]
 59: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 60: (()+0x13e3dd) [0x7f83501503dd]
 61: (PyObject_Call()+0x43) [0x7f83501231e3]
 62: (PyEval_EvalFrameEx()+0x122c) [0x7f83500c9f2c]
 63: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24]
 64: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24]
 65: (PyEval_EvalFrameEx()+0x7124) [0x7f83500cfe24]
 66: (PyEval_EvalCodeEx()+0x85c) [0x7f83501fa01c]
 67: (()+0x13e2e0) [0x7f83501502e0]
 68: (PyObject_Call()+0x43) [0x7f83501231e3]
 69: (()+0x18531c) [0x7f835019731c]
 70: (PyObject_Call()+0x43) [0x7f83501231e3]
 71: (()+0x132465) [0x7f8350144465]
 72: (PyObject_Call()+0x43) [0x7f83501231e3]
 73: (PyEval_CallObjectWithKeywords()+0x47) [0x7f83501f9447]
 74: (()+0x1b432) [0x7f83505bb432]
 75: (()+0x1e14a) [0x7f83505be14a]
 76: (()+0x76ba) [0x7f8354bae6ba]
 77: (clone()+0x6d) [0x7f83548e482d]

#16 Updated by Patrick Donnelly over 4 years ago

  • Status changed from 12 to New

#17 Updated by Sage Weil almost 3 years ago

  • Project changed from Ceph to RADOS

#18 Updated by Neha Ojha about 2 years ago

  • Status changed from New to Can't reproduce

Also available in: Atom PDF