Bug #2014
librados shutdown race
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
(gdb) #0 Lock (this=0x0, idx=123207, amt=1) at ./common/Mutex.h:107 #1 Locker (this=0x0, idx=123207, amt=1) at ./common/Mutex.h:135 #2 PerfCounters::inc (this=0x0, idx=123207, amt=1) at common/perf_counters.cc:92 #3 0x00007fe89c86e0f7 in Objecter::handle_osd_op_reply (this=0x67fe80, m=<value optimized out>) at osdc/Objecter.cc:1238 #4 0x00007fe89c83b574 in librados::RadosClient::_dispatch (this=0x67bbf0, m=0x1e147) at librados.cc:1069 #5 0x00007fe89c83b603 in librados::RadosClient::ms_dispatch (this=0x67bbf0, m=0x7fe8214f45c0) at librados.cc:1039 #6 0x00007fe89c93bfaa in ms_deliver_dispatch (this=0x67f000) at msg/Messenger.h:102 #7 SimpleMessenger::dispatch_entry (this=0x67f000) at msg/SimpleMessenger.cc:364 #8 0x00007fe89c84827c in SimpleMessenger::DispatchThread::entry (this=0x67f488) at msg/SimpleMessenger.h:545 #9 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x0) at common/Thread.cc:41 #10 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #11 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #12 0x0000000000000000 in ?? () (gdb) thr app all bt Thread 16 (Thread 21411): #0 SimpleMessenger::Pipe::fault (this=0x750380, onconnect=false, onread=true) at msg/SimpleMessenger.cc:1430 #1 0x00007fe89c9550b6 in SimpleMessenger::Pipe::reader (this=0x750380) at msg/SimpleMessenger.cc:1570 #2 0x00007fe89c848315 in SimpleMessenger::Pipe::Reader::entry (this=<value optimized out>) at msg/SimpleMessenger.h:209 #3 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x750380) at common/Thread.cc:41 #4 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #5 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #6 0x0000000000000000 in ?? () Thread 15 (Thread 21406): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fe89c59b71e in _L_cond_lock_1028 () from /lib/libpthread.so.0 #2 0x00007fe89c59b54b in __pthread_mutex_cond_lock (mutex=0x750458) at ../nptl/pthread_mutex_lock.c:61 #3 0x00007fe89c595b36 in pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:236 #4 0x00007fe89c95c6ad in Wait (this=0x750380) at ./common/Cond.h:48 #5 SimpleMessenger::Pipe::writer (this=0x750380) at msg/SimpleMessenger.cc:1781 #6 0x00007fe89c848345 in SimpleMessenger::Pipe::Writer::entry (this=<value optimized out>) at msg/SimpleMessenger.h:217 #7 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x750458) at common/Thread.cc:41 #8 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #9 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #10 0x0000000000000000 in ?? () Thread 14 (Thread 21394): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fe89c593849 in _L_lock_953 () from /lib/libpthread.so.0 #2 0x00007fe89c59366b in __pthread_mutex_lock (mutex=0x67c3f0) at pthread_mutex_lock.c:61 #3 0x00007fe89c92405c in Lock (this=0x67c458) at common/Mutex.h:108 #4 SafeTimer::shutdown (this=0x67c458) at common/Timer.cc:77 #5 0x00007fe89c8286c4 in librados::RadosClient::shutdown (this=0x67bbf0) at librados.cc:1016 #6 0x00007fe89c828942 in librados::Rados::shutdown (this=0x7fff5d5ea750) at librados.cc:3172 #7 0x0000000000409785 in shutdown (argc=<value optimized out>, argv=<value optimized out>) at ./test/osd/RadosModel.h:148 #8 main (argc=<value optimized out>, argv=<value optimized out>) at test/osd/TestRados.cc:250 Thread 13 (Thread 21423): #0 SimpleMessenger::Pipe::fault (this=0xfce3d0, onconnect=false, onread=true) at msg/SimpleMessenger.cc:1430 #1 0x00007fe89c9550b6 in SimpleMessenger::Pipe::reader (this=0xfce3d0) at msg/SimpleMessenger.cc:1570 #2 0x00007fe89c848315 in SimpleMessenger::Pipe::Reader::entry (this=<value optimized out>) at msg/SimpleMessenger.h:209 #3 0x00007fe89c9b3292 in Thread::_entry_func (arg=0xfce3d0) at common/Thread.cc:41 #4 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #5 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #6 0x0000000000000000 in ?? () Thread 12 (Thread 21396): #0 0x00007fe89bdc5203 in __poll (fds=<value optimized out>, nfds=<value optimized out>, timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:87 #1 0x00007fe89c919df6 in AdminSocket::entry (this=0x67b460) at common/admin_socket.cc:211 #2 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x7fe89a79cdf0) at common/Thread.cc:41 #3 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #4 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #5 0x0000000000000000 in ?? () Thread 11 (Thread 21405): #0 SimpleMessenger::Pipe::fault (this=0x6883f0, onconnect=false, onread=true) at msg/SimpleMessenger.cc:1430 #1 0x00007fe89c9550b6 in SimpleMessenger::Pipe::reader (this=0x6883f0) at msg/SimpleMessenger.cc:1570 #2 0x00007fe89c848315 in SimpleMessenger::Pipe::Reader::entry (this=<value optimized out>) at msg/SimpleMessenger.h:209 #3 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x6883f0) at common/Thread.cc:41 #4 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #5 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #6 0x0000000000000000 in ?? () Thread 10 (Thread 21424): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 #1 0x00007fe89c59b71e in _L_cond_lock_1028 () from /lib/libpthread.so.0 #2 0x00007fe89c59b54b in __pthread_mutex_cond_lock (mutex=0x7fe89028e6f8) at ../nptl/pthread_mutex_lock.c:61 #3 0x00007fe89c595b36 in pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:236 #4 0x00007fe89c95c6ad in Wait (this=0x7fe89028e620) at ./common/Cond.h:48 #5 SimpleMessenger::Pipe::writer (this=0x7fe89028e620) at msg/SimpleMessenger.cc:1781 #6 0x00007fe89c848345 in SimpleMessenger::Pipe::Writer::entry (this=<value optimized out>) at msg/SimpleMessenger.h:217 #7 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x7fe89028e6f8) at common/Thread.cc:41 #8 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #9 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #10 0x0000000000000000 in ?? () Thread 9 (Thread 21422): #0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136 ---Type <return> to continue, or q <return> to quit--- #1 0x00007fe89c59b71e in _L_cond_lock_1028 () from /lib/libpthread.so.0 #2 0x00007fe89c59b54b in __pthread_mutex_cond_lock (mutex=0xfce4a8) at ../nptl/pthread_mutex_lock.c:61 #3 0x00007fe89c595b36 in pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:236 #4 0x00007fe89c95c6ad in Wait (this=0xfce3d0) at ./common/Cond.h:48 #5 SimpleMessenger::Pipe::writer (this=0xfce3d0) at msg/SimpleMessenger.cc:1781 #6 0x00007fe89c848345 in SimpleMessenger::Pipe::Writer::entry (this=<value optimized out>) at msg/SimpleMessenger.h:217 #7 0x00007fe89c9b3292 in Thread::_entry_func (arg=0xfce4a8) at common/Thread.cc:41 #8 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #9 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #10 0x0000000000000000 in ?? () Thread 8 (Thread 21395): #0 sem_timedwait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S:103 #1 0x00007fe89c9ab27f in CephContextServiceThread::entry (this=0x67bad0) at common/ceph_context.cc:53 #2 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x67bae8) at common/Thread.cc:41 #3 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #4 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #5 0x0000000000000000 in ?? () Thread 7 (Thread 21402): #0 0x00007fe89bdc5203 in __poll (fds=<value optimized out>, nfds=<value optimized out>, timeout=900000) at ../sysdeps/unix/sysv/linux/poll.c:87 #1 0x00007fe89c939469 in tcp_read_wait (sd=<value optimized out>, timeout=<value optimized out>) at msg/tcp.cc:53 #2 0x00007fe89c947d40 in tcp_read (cct=0x672680, sd=7, buf=0x7fe897f97d9f "ΓΏ\005", len=1, timeout=0) at msg/tcp.cc:26 #3 0x00007fe89c95470d in SimpleMessenger::Pipe::reader (this=0x681090) at msg/SimpleMessenger.cc:1566 #4 0x00007fe89c848315 in SimpleMessenger::Pipe::Reader::entry (this=<value optimized out>) at msg/SimpleMessenger.h:209 #5 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x7fe897f97cc0) at common/Thread.cc:41 #6 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #7 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #8 0x0000000000000000 in ?? () Thread 6 (Thread 21427): #0 SimpleMessenger::Pipe::fault (this=0x7fe89028e620, onconnect=false, onread=true) at msg/SimpleMessenger.cc:1430 #1 0x00007fe89c9550b6 in SimpleMessenger::Pipe::reader (this=0x7fe89028e620) at msg/SimpleMessenger.cc:1570 #2 0x00007fe89c848315 in SimpleMessenger::Pipe::Reader::entry (this=<value optimized out>) at msg/SimpleMessenger.h:209 #3 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x7fe89028e620) at common/Thread.cc:41 #4 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #5 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #6 0x0000000000000000 in ?? () Thread 5 (Thread 21401): #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162 #1 0x00007fe89c95c6ad in Wait (this=0x681090) at ./common/Cond.h:48 #2 SimpleMessenger::Pipe::writer (this=0x681090) at msg/SimpleMessenger.cc:1781 #3 0x00007fe89c848345 in SimpleMessenger::Pipe::Writer::entry (this=<value optimized out>) at msg/SimpleMessenger.h:217 #4 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x68126c) at common/Thread.cc:41 #5 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #6 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #7 0x0000000000000000 in ?? () Thread 4 (Thread 21414): #0 SimpleMessenger::Pipe::fault (this=0x75cb80, onconnect=false, onread=true) at msg/SimpleMessenger.cc:1430 #1 0x00007fe89c9550b6 in SimpleMessenger::Pipe::reader (this=0x75cb80) at msg/SimpleMessenger.cc:1570 #2 0x00007fe89c848315 in SimpleMessenger::Pipe::Reader::entry (this=<value optimized out>) at msg/SimpleMessenger.h:209 #3 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x75cb80) at common/Thread.cc:41 #4 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #5 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #6 0x0000000000000000 in ?? () Thread 3 (Thread 21398): #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162 #1 0x00007fe89c94c848 in Wait (this=0x67f000) at ./common/Cond.h:48 #2 SimpleMessenger::reaper_entry (this=0x67f000) at msg/SimpleMessenger.cc:2286 #3 0x00007fe89c848f2c in SimpleMessenger::ReaperThread::entry (this=0x67f430) at msg/SimpleMessenger.h:496 #4 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x67f45c) at common/Thread.cc:41 #5 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #6 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #7 0x0000000000000000 in ?? () Thread 2 (Thread 21410): #0 SimpleMessenger::Pipe::fault (this=0x694480, onconnect=false, onread=true) at msg/SimpleMessenger.cc:1430 #1 0x00007fe89c9550b6 in SimpleMessenger::Pipe::reader (this=0x694480) at msg/SimpleMessenger.cc:1570 #2 0x00007fe89c848315 in SimpleMessenger::Pipe::Reader::entry (this=<value optimized out>) at msg/SimpleMessenger.h:209 #3 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x694480) at common/Thread.cc:41 #4 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #5 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 ---Type <return> to continue, or q <return> to quit--- #6 0x0000000000000000 in ?? () Thread 1 (Thread 21397): #0 Lock (this=0x0, idx=123207, amt=1) at ./common/Mutex.h:107 #1 Locker (this=0x0, idx=123207, amt=1) at ./common/Mutex.h:135 #2 PerfCounters::inc (this=0x0, idx=123207, amt=1) at common/perf_counters.cc:92 #3 0x00007fe89c86e0f7 in Objecter::handle_osd_op_reply (this=0x67fe80, m=<value optimized out>) at osdc/Objecter.cc:1238 #4 0x00007fe89c83b574 in librados::RadosClient::_dispatch (this=0x67bbf0, m=0x1e147) at librados.cc:1069 #5 0x00007fe89c83b603 in librados::RadosClient::ms_dispatch (this=0x67bbf0, m=0x7fe8214f45c0) at librados.cc:1039 #6 0x00007fe89c93bfaa in ms_deliver_dispatch (this=0x67f000) at msg/Messenger.h:102 #7 SimpleMessenger::dispatch_entry (this=0x67f000) at msg/SimpleMessenger.cc:364 #8 0x00007fe89c84827c in SimpleMessenger::DispatchThread::entry (this=0x67f488) at msg/SimpleMessenger.h:545 #9 0x00007fe89c9b3292 in Thread::_entry_func (arg=0x0) at common/Thread.cc:41 #10 0x00007fe89c591971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #11 0x00007fe89bdd192d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #12 0x0000000000000000 in ?? ()
objecter was shut down, but we're still processing messages.
History
#1 Updated by Sage Weil almost 12 years ago
diff --git a/src/librados.cc b/src/librados.cc index d9cd8ad..6b3018a 100644 --- a/src/librados.cc +++ b/src/librados.cc @@ -1035,8 +1035,14 @@ librados::RadosClient::~RadosClient() bool librados::RadosClient::ms_dispatch(Message *m) { + bool ret; lock.Lock(); - bool ret = _dispatch(m); + if (state == DISCONNECTED) { + m->put(); + ret = true; + } else { + ret = _dispatch(m); + } lock.Unlock(); return ret; }
#2 Updated by Sage Weil almost 12 years ago
- Status changed from New to Resolved
resolved by 33659521a92315f71040551b2699d9961acc07f7 and neighbors.