Actions
Bug #484
closedmsgr: crash on just-closed pipe
% Done:
0%
Source:
Tags:
Backport:
Regression:
Severity:
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
The log shows the pipe 0x7fea08000c30 was just marked down:
10.10.13_16:15:52.778718 7fea0fad3910 -- 10.14.0.110:6800/4556 --> mon1 10.14.0.119:6789/0 -- mon_subscribe({monmap=2+}) v1 -- ?+0 0x7fea080009f0 10.10.13_16:15:52.778733 7fea0fad3910 -- 10.14.0.110:6800/4556 submit_message mon_subscribe({monmap=2+}) v1 remote, 10.14.0.119:6789/0, have pipe. 10.10.13_16:15:52.778751 7fea0fad3910 -- 10.14.0.110:6800/4556 --> mon1 10.14.0.119:6789/0 -- osd_boot(osd3 v4011) v1 -- ?+0 0x7fea08002010 10.10.13_16:15:52.778767 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer: state = 2 policy.server=0 10.10.13_16:15:52.778787 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer encoding 3 0x7fea080009f0 mon_subscribe({monmap=2+}) v1 10.10.13_16:15:52.778806 7fea0fad3910 -- 10.14.0.110:6800/4556 submit_message osd_boot(osd3 v4011) v1 remote, 10.14.0.119:6789/0, have pipe. 10.10.13_16:15:52.778824 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer sending 3 0x7fea080009f0 10.10.13_16:15:52.778843 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).write_message 0x7fea080009f0 10.10.13_16:15:52.778860 7fea0fad3910 cephx: validate_tickets want 37 have 37 need 0 10.10.13_16:15:52.778872 7fea0fad3910 cephx client: need_tickets: want=37 need=0 have=37 10.10.13_16:15:52.778882 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer: state = 2 policy.server=0 10.10.13_16:15:52.778902 7fea0fad3910 monclient: _check_auth_rotating renewing rotating keys (they expired before 10.10.13_16:15:22.778881) 10.10.13_16:15:52.778917 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer encoding 4 0x7fea08002010 osd_boot(osd3 v4011) v1 10.10.13_16:15:52.778942 7fea0fad3910 cephx client: build_rotating_request 10.10.13_16:15:52.778955 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer sending 4 0x7fea08002010 10.10.13_16:15:52.778974 7fea0fad3910 -- 10.14.0.110:6800/4556 --> mon1 10.14.0.119:6789/0 -- auth(proto 2 2 bytes) v1 -- ?+0 0x7fea08004500 10.10.13_16:15:52.778993 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).write_message 0x7fea08002010 10.10.13_16:15:52.779013 7fea0fad3910 -- 10.14.0.110:6800/4556 submit_message auth(proto 2 2 bytes) v1 remote, 10.14.0.119:6789/0, have pipe. 10.10.13_16:15:52.779035 7fea0fad3910 -- 10.14.0.110:6800/4556 dispatch_throttle_release 225 to dispatch throttler 225/104857600 10.10.13_16:15:52.779048 7fea0fad3910 -- 10.14.0.110:6800/4556 done calling dispatch on 0x25fbc60 10.10.13_16:15:52.779060 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer: state = 2 policy.server=0 10.10.13_16:15:52.779092 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer encoding 5 0x7fea08004500 auth(proto 2 2 bytes) v1 10.10.13_16:15:52.779110 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer sending 5 0x7fea08004500 10.10.13_16:15:52.779125 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).write_message 0x7fea08004500 10.10.13_16:15:52.779157 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer: state = 2 policy.server=0 10.10.13_16:15:52.779170 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer sleeping 10.10.13_16:15:52.779198 7fea17bf6710 monclient: wait_auth_rotating waiting (until 10.10.13_16:16:22.451688) 10.10.13_16:15:52.780158 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader got ACK 10.10.13_16:15:52.780176 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader got ack seq 3 10.10.13_16:15:52.780188 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader reading tag... 10.10.13_16:15:52.780201 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader got MSG 10.10.13_16:15:52.780215 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader got envelope type=16 src mon1 front=20 data=0 off 0 10.10.13_16:15:52.780229 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader wants 20 from dispatch throttler 0/104857600 10.10.13_16:15:52.780245 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader got front 20 10.10.13_16:15:52.780260 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).aborted = 0 10.10.13_16:15:52.780272 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader got 20 + 0 + 0 byte message 10.10.13_16:15:52.780291 7fea0e1cf910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).reader got message 3 0x25fbc60 mon_subscribe_ack(300s) v1 10.10.13_16:15:52.780308 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer: state = 2 policy.server=0 10.10.13_16:15:52.780326 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).write_ack 3 10.10.13_16:15:52.780342 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).do_sendmsg error Broken pipe 10.10.13_16:15:52.780355 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).writer couldn't write ack, Broken pipe 10.10.13_16:15:52.780367 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=12 pgs=79601 cs=1 l=1).fault 32: Broken pipe 10.10.13_16:15:52.780385 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=-1 pgs=79601 cs=1 l=1).fault on lossy channel, failing 10.10.13_16:15:52.780397 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=-1 pgs=79601 cs=1 l=1).fail 10.10.13_16:15:52.780408 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=-1 pgs=79601 cs=1 l=1).stop 10.10.13_16:15:52.780419 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=-1 pgs=79601 cs=1 l=1).discard_queue 10.10.13_16:15:52.780439 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=-1 pgs=79601 cs=1 l=1).writer finishing 10.10.13_16:15:52.780452 7fea0e2d0910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=-1 pgs=79601 cs=1 l=1).writer done 10.10.13_16:15:52.780486 7fea0fad3910 monclient: ms_handle_reset current mon 10.14.0.119:6789/0 10.10.13_16:15:52.780496 7fea0fad3910 monclient: hunting for new mon 10.10.13_16:15:52.780502 7fea0fad3910 monclient(hunting): _reopen_session 10.10.13_16:15:52.780509 7fea0fad3910 -- 10.14.0.110:6800/4556 mark_down 10.14.0.119:6789/0 -- 0x7fea08000c30 10.10.13_16:15:52.780520 7fea0fad3910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=-1 pgs=79601 cs=1 l=1).unregister_pipe 10.10.13_16:15:52.780533 7fea0fad3910 -- 10.14.0.110:6800/4556 >> 10.14.0.119:6789/0 pipe(0x7fea08000c30 sd=-1 pgs=79601 cs=1 l=1).stop 10.10.13_16:15:52.780545 7fea0fad3910 monclient(hunting): _pick_new_mon picked mon0 10.10.13_16:15:52.780557 7fea0fad3910 -- 10.14.0.110:6800/4556 --> mon0 10.14.0.118:6789/0 -- auth(proto 0 26 bytes) v1 -- ?+0 0x7fea08000f40 10.10.13_16:15:52.780572 7fea0fad3910 -- 10.14.0.110:6800/4556 submit_message auth(proto 0 26 bytes) v1 remote, 10.14.0.118:6789/0, new pipe. 10.10.13_16:15:52.780583 7fea0fad3910 -- 10.14.0.110:6800/4556 connect_rank to 10.14.0.118:6789/0, creating pipe and registering 10.10.13_16:15:52.780605 7fea0fad3910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=-1 pgs=0 cs=0 l=0).register_pipe 10.10.13_16:15:52.780623 7fea0fad3910 monclient(hunting): renew_subs 10.10.13_16:15:52.780637 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=-1 pgs=0 cs=0 l=0).writer: state = 1 policy.server=0 10.10.13_16:15:52.780651 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=-1 pgs=0 cs=0 l=0).connect 0 10.10.13_16:15:52.780666 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=12 pgs=0 cs=0 l=0).connecting to 10.14.0.118:6789/0 10.10.13_16:15:52.783405 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=12 pgs=0 cs=0 l=0).connect read peer addr 10.14.0.118:6789/0 on socket 12 10.10.13_16:15:52.783457 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=12 pgs=0 cs=0 l=0).connect peer addr for me is 10.14.0.110:52781/0 10.10.13_16:15:52.783489 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=12 pgs=0 cs=0 l=0).connect sent my addr 10.14.0.110:6800/4556 10.10.13_16:15:52.783506 7fea17b33910 osd3 4011 OSD::ms_get_authorizer type=mon 10.10.13_16:15:52.783514 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=12 pgs=0 cs=0 l=0).connect sending gseq=94 cseq=0 proto=15 10.10.13_16:15:52.783535 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=12 pgs=0 cs=0 l=0).connect wrote (self +) cseq, waiting for reply 10.10.13_16:15:52.783634 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=12 pgs=0 cs=0 l=0).connect got reply tag 1 connect_seq 1 global_seq 33466 proto 15 flags 1 10.10.13_16:15:52.783651 7fea17b33910 -- 10.14.0.110:6800/4556 >> 10.14.0.118:6789/0 pipe(0x7fea080044b0 sd=12 pgs=33466 cs=1 l=1).connect success 1, lossy = 1, features 19
but then we crash with this
(gdb) bt #0 0x00007fea1675df45 in raise () from /lib/libc.so.6 #1 0x00007fea16760d80 in abort () from /lib/libc.so.6 #2 0x00007fea1679354d in ?? () from /lib/libc.so.6 #3 0x00007fea1679cd16 in ?? () from /lib/libc.so.6 #4 0x00007fea167a19bc in free () from /lib/libc.so.6 #5 0x000000000066195e in __gnu_cxx::new_allocator<std::_List_node<Message*> >::deallocate (this=0x7fea08004b88, __p=0x7fea08004b88) at /usr/include/c++/4.4/ext/new_allocator.h:95 #6 0x000000000065fdba in std::_List_base<Message*, std::allocator<Message*> >::_M_put_node (this=0x7fea08004b88, __p=0x7fea08004b88) at /usr/include/c++/4.4/bits/stl_list.h:320 #7 0x00000000006772e2 in std::list<Message*, std::allocator<Message*> >::_M_erase (this=0x7fea08004b88, __position=...) at /usr/include/c++/4.4/bits/stl_list.h:1431 #8 0x00000000006763e2 in std::list<Message*, std::allocator<Message*> >::pop_front (this=0x7fea08004b88) at /usr/include/c++/4.4/bits/stl_list.h:906 #9 0x00000000006662b9 in SimpleMessenger::dispatch_entry (this=0x25d6ef0) at msg/SimpleMessenger.cc:290 #10 0x000000000065bb40 in SimpleMessenger::DispatchThread::entry (this=0x25d7378) at msg/SimpleMessenger.h:536 #11 0x00000000006733e3 in Thread::_entry_func (arg=0x25d7378) at ./common/Thread.h:39 #12 0x00007fea175ce73a in start_thread () from /lib/libpthread.so.0 #13 0x00007fea167f769d in clone () from /lib/libc.so.6 #14 0x0000000000000000 in ?? () (gdb) l 280 275 { 276 dispatch_queue.lock.Lock(); 277 while (!dispatch_queue.stop) { 278 while (!dispatch_queue.queued_pipes.empty() && !dispatch_queue.stop) { 279 //get highest-priority pipe 280 map<int, xlist<Pipe *> >::reverse_iterator high_iter = 281 dispatch_queue.queued_pipes.rbegin(); 282 int priority = high_iter->first; 283 xlist<Pipe *>& pipe_list = high_iter->second; 284 (gdb) 285 Pipe *pipe = pipe_list.front(); 286 //move pipe to back of line -- or just take off if no more messages 287 pipe->pipe_lock.Lock(); 288 list<Message *>& m_queue = pipe->in_q[priority]; 289 Message *m = m_queue.front(); 290 m_queue.pop_front(); 291 292 if (m_queue.empty()) { 293 pipe_list.pop_front(); // pipe is done 294 if (pipe_list.empty()) (gdb) p pipe $3 = (class SimpleMessenger::Pipe *) 0x7fea08000c30
Running 0.21.3 e5882981b55f3c74d6b8b22a2bf5fbec81b775e6
Actions