As shown in the following stack trace, the OSD process crashed while trying to
umount filestore after upgrading filestore in main thread. The
FileStore::umount() involves destroying LevelDBStore. The LevelDBStore will
destroy leveldb::FilterPolicy, leveldb::Cache, and leveldb::DB in turn. And
leveldb::DB is referring leveldb::FilterPolicy and leveldb::Cache.
Since leveldb::DB scheduled threads to perform compaction upon initialization.
If
leveldb::FilterPolicy or leveldb::Cache is destroyed in main thread before
Leveldb compaction thread ends, there will be a "Segmentation fault".
The fix is to ensure LevelDBStore::db is destroyed before dependent LevelDBStore::db_cache and LevelDBStore::filterpolicy.
==================================================================================
-bash-4.1$ sudo gdb /usr/bin/ceph-osd ceph-osd.core.0
(gdb) thread apply all bt
Thread 7 (Thread 0x7f0ad4f3c700 (LWP 97724)):
#0 0x0000003eba20d811 in sem_timedwait () from /lib64/libpthread.so.0
#1 0x0000000000841280 in CephContextServiceThread::entry (this=0x2ef1d40) at
common/ceph_context.cc:58
#2 0x0000003eba207851 in start_thread () from /lib64/libpthread.so.0
#3 0x0000003eb9ee890d in clone () from /lib64/libc.so.6
Thread 6 (Thread 0x7f0ad453b700 (LWP 97725)):
#0 0x0000003eb9edf253 in poll () from /lib64/libc.so.6
#1 0x000000000093c05a in AdminSocket::entry (this=0x2f68280) at
common/admin_socket.cc:230
#2 0x0000003eba207851 in start_thread () from /lib64/libpthread.so.0
#3 0x0000003eb9ee890d in clone () from /lib64/libc.so.6
Thread 5 (Thread 0x7f0ad5e377a0 (LWP 97716)):
#0 0x0000003eba20b43c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x0000003de3c33b2d in leveldb::port::CondVar::Wait() () from
/usr/lib64/libleveldb.so.1
#2 0x0000003de3c196c8 in leveldb::DBImpl::~DBImpl() () from
/usr/lib64/libleveldb.so.1
#3 0x0000003de3c19a19 in leveldb::DBImpl::~DBImpl() () from
/usr/lib64/libleveldb.so.1
#4 0x00000000007f389d in checked_delete<leveldb::DB> (this=0x2f68500,
_in_chrg=<value optimized out>)
at /usr/include/boost/checked_delete.hpp:34
#5 ~scoped_ptr (this=0x2f68500, __in_chrg=<value optimized out>) at
/usr/include/boost/smart_ptr/scoped_ptr.hpp:80
#6 LevelDBStore::~LevelDBStore (this=0x2f68500, __in_chrg=<value optimized
out>) at os/LevelDBStore.cc:78
#7 0x00000000007f3a59 in LevelDBStore::~LevelDBStore (this=0x2f68500,
_in_chrg=<value optimized out>) at os/LevelDBStore.cc:78
#8 0x00000000007f0b06 in checked_delete<KeyValueDB> (this=0x2f714a0,
_in_chrg=<value optimized out>)
at /usr/include/boost/checked_delete.hpp:34
#9 ~scoped_ptr (this=0x2f714a0, __in_chrg=<value optimized out>) at
/usr/include/boost/smart_ptr/scoped_ptr.hpp:80
#10 DBObjectMap::~DBObjectMap (this=0x2f714a0, __in_chrg=<value optimized out>)
at os/DBObjectMap.h:54
#11 0x00000000007f0b79 in DBObjectMap::~DBObjectMap (this=0x2f714a0,
_in_chrg=<value optimized out>) at os/DBObjectMap.h:54
#12 0x00000000007a0dd0 in checked_delete<ObjectMap> (this=0x2ff0000) at
/usr/include/boost/checked_delete.hpp:34
#13 ~scoped_ptr (this=0x2ff0000) at
/usr/include/boost/smart_ptr/scoped_ptr.hpp:80
#14 reset (this=0x2ff0000) at /usr/include/boost/smart_ptr/scoped_ptr.hpp:86
#15 FileStore::umount (this=0x2ff0000) at os/FileStore.cc:1960
#16 0x0000000000676d6a in OSD::do_convertfs (store=0x2ff0000) at osd/OSD.cc:495
#17 0x0000000000676ee7 in OSD::convertfs (dev="/var/lib/ceph/osd/ceph-30",
jdev="/var/lib/ceph/osd/ceph-30/journal")
at osd/OSD.cc:559
#18 0x00000000005af9b2 in main (argc=<value optimized out>, argv=<value
optimized out>) at ceph_osd.cc:420
Thread 4 (Thread 0x7f0ad3b3a700 (LWP 97726)):
#0 0x0000003eba20b43c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x00000000007f7dea in Wait (this=0x2ff0948, next=0x7f0ad3b39d40) at
./common/Cond.h:55
---Type <return> to continue, or q <return> to quit---
#2 WBThrottle::get_next_should_flush (this=0x2ff0948, next=0x7f0ad3b39d40) at
os/WBThrottle.cc:127
#3 0x00000000007f89f1 in WBThrottle::entry (this=0x2ff0948) at
os/WBThrottle.cc:145
#4 0x0000003eba207851 in start_thread () from /lib64/libpthread.so.0
#5 0x0000003eb9ee890d in clone () from /lib64/libc.so.6
Thread 3 (Thread 0x7f0acbe18700 (LWP 97757)):
#0 0x0000003eba20b43c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x0000000000848e37 in Wait (this=0x2ff06d8) at common/Cond.h:55
#2 SafeTimer::timer_thread (this=0x2ff06d8) at common/Timer.cc:112
#3 0x000000000084b2ad in SafeTimerThread::entry (this=<value optimized out>)
at common/Timer.cc:38
#4 0x0000003eba207851 in start_thread () from /lib64/libpthread.so.0
#5 0x0000003eb9ee890d in clone () from /lib64/libc.so.6
Thread 2 (Thread 0x7f0ad5e36700 (LWP 97717)):
#0 0x0000003eba20b43c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x00000000008430ab in ceph::log::Log::entry (this=0x2f68000) at
log/Log.cc:323
#2 0x0000003eba207851 in start_thread () from /lib64/libpthread.so.0
#3 0x0000003eb9ee890d in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x7f0ad3139700 (LWP 97734)):
#0 0x0000003eba20f3cb in raise () from /lib64/libpthread.so.0
#1 0x000000000080d707 in reraise_fatal (signum=6) at
global/signal_handler.cc:59
#2 handle_fatal_signal (signum=6) at global/signal_handler.cc:105
#3 <signal handler called>
#4 0x0000003eb9e328a5 in raise () from /lib64/libc.so.6
#5 0x0000003eb9e34085 in abort () from /lib64/libc.so.6
#6 0x0000003de3c33ab6 in ?? () from /usr/lib64/libleveldb.so.1
#7 0x0000003de3c3b9c2 in ?? () from /usr/lib64/libleveldb.so.1
#8 0x0000003de3c37cc0 in leveldb::Table::BlockReader(void*,
leveldb::ReadOptions const&, leveldb::Slice const&) ()
from /usr/lib64/libleveldb.so.1
#9 0x0000003de3c3a412 in ?? () from /usr/lib64/libleveldb.so.1
#10 0x0000003de3c3a6f8 in ?? () from /usr/lib64/libleveldb.so.1
#11 0x0000003de3c3761a in ?? () from /usr/lib64/libleveldb.so.1
#12 0x0000003de3c1dd5c in
leveldb::DBImpl::DoCompactionWork(leveldb::DBImpl::CompactionState*) () from
/usr/lib64/libleveldb.so.1
#13 0x0000003de3c1e531 in leveldb::DBImpl::BackgroundCompaction() () from
/usr/lib64/libleveldb.so.1
#14 0x0000003de3c1ec50 in leveldb::DBImpl::BackgroundCall() () from
/usr/lib64/libleveldb.so.1
#15 0x0000003de3c3dc6f in ?? () from /usr/lib64/libleveldb.so.1
#16 0x0000003eba207851 in start_thread () from /lib64/libpthread.so.0
#17 0x0000003eb9ee890d in clone () from /lib64/libc.so.6