Actions
Bug #5389
closedosd: op_tp timeout on big cluster + radosmodel
% Done:
0%
Source:
Q/A
Tags:
Backport:
Regression:
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
no errors in kern.log, so we can't blame this on the kenrel.
ubuntu@teuthology:/a/teuthology-2013-06-17_20:32:15-big-master-testing-basic/38628$ cat orig.config.yaml kernel: kdb: true sha1: dbb898fa64ead2446a8e7e40b90ab55b2e066e09 machine_type: plana nuke-on-error: true overrides: ceph: conf: mon: debug mon: 20 debug ms: 20 debug paxos: 20 log-whitelist: - slow request sha1: 47ce702ce6230f2404bf0b1cb051d78489537469 install: ceph: sha1: 47ce702ce6230f2404bf0b1cb051d78489537469 s3tests: branch: master workunit: sha1: 47ce702ce6230f2404bf0b1cb051d78489537469 roles: - - osd.0 - osd.1 - osd.2 - client.0 - mon.a - - osd.3 - osd.4 - osd.5 - client.1 - mon.b - - osd.6 - osd.7 - osd.8 - client.2 - mon.c - - osd.9 - osd.10 - osd.11 - client.3 - mon.d - - osd.12 - osd.13 - osd.14 - client.4 - mon.e - - osd.15 - osd.16 - osd.17 - client.5 - - osd.18 - osd.19 - osd.20 - client.6 - - osd.21 - osd.22 - osd.23 - client.7 - - osd.24 - osd.25 - osd.26 - client.8 - - osd.27 - osd.28 - osd.29 - client.9 - - osd.30 - osd.31 - osd.32 - client.10 - - osd.33 - osd.34 - osd.35 - client.11 - - osd.36 - osd.37 - osd.38 - client.12 - - osd.39 - osd.40 - osd.41 - client.13 - - osd.42 - osd.43 - osd.44 - client.14 - - osd.45 - osd.46 - osd.47 - client.15 - - osd.48 - osd.49 - osd.50 - client.16 - - osd.51 - osd.52 - osd.53 - client.17 - - osd.54 - osd.55 - osd.56 - client.18 - - osd.57 - osd.58 - osd.59 - client.19 - - osd.60 - osd.61 - osd.62 - client.20 tasks: - chef: null - clock.check: null - install: null - ceph: null - rados: objects: 50 op_weights: delete: 50 read: 100 rollback: 50 snap_create: 50 snap_remove: 50 write: 100 ops: 4000
Updated by Samuel Just almost 11 years ago
Pertinent threads: Thread 32 (Thread 0x7f816c33a700 (LWP 16653)): #0 0x00007f817e4db89c in __lll_lock_wait () from /lib/x86_64-linux-gnu/libpthread.so.0 #1 0x00007f817e4d7065 in _L_lock_858 () from /lib/x86_64-linux-gnu/libpthread.so.0 #2 0x00007f817e4d6eba in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0 #3 0x0000000000876743 in Mutex::Lock(bool) () #4 0x00000000006e9079 in PG::lock(bool) () #5 0x0000000000668599 in OSD::OpWQ::_process(boost::intrusive_ptr<PG>) () #6 0x00000000006a4861 in ThreadPool::WorkQueueVal<std::pair<boost::intrusive_ptr<PG>, std::tr1::shared_ptr<OpRequest> >, boost::intrusive_ptr<PG> >::_process(boost::intrusive_ptr<PG>, ThreadPool::TPHandle&) () #7 0x00000000006a4bcc in ThreadPool::WorkQueueVal<std::pair<boost::intrusive_ptr<PG>, std::tr1::shared_ptr<OpRequest> >, boost::intrusive_ptr<PG> >::_void_process(void*, ThreadPool::TPHandle&) () #8 0x00000000008a4206 in ThreadPool::worker(ThreadPool::WorkThread*) () #9 0x00000000008a6030 in ThreadPool::WorkThread::entry() () #10 0x00007f817e4d4e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 #11 0x00007f817c667ccd in clone () from /lib/x86_64-linux-gnu/libc.so.6 #12 0x0000000000000000 in ?? () Thread 28 (Thread 0x7f8173348700 (LWP 16626)): #0 0x00007f817e4db89c in __lll_lock_wait () from /lib/x86_64-linux-gnu/libpthread.so.0 #1 0x00007f817e4d7065 in _L_lock_858 () from /lib/x86_64-linux-gnu/libpthread.so.0 #2 0x00007f817e4d6eba in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0 #3 0x0000000000876743 in Mutex::Lock(bool) () #4 0x00000000006e9079 in PG::lock(bool) () #5 0x00000000005e8cce in ReplicatedPG::op_applied(ReplicatedPG::RepGather*) () #6 0x000000000062291a in Context::complete(int) () #7 0x00000000006a245d in finish_contexts(CephContext*, std::list<Context*, std::allocator<Context*> >&, int) () #8 0x00000000008012b0 in Finisher::finisher_thread_entry() () #9 0x00007f817e4d4e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 #10 0x00007f817c667ccd in clone () from /lib/x86_64-linux-gnu/libc.so.6 #11 0x0000000000000000 in ?? () Thread 27 (Thread 0x7f8171b45700 (LWP 16642)): #0 0x00007f817e4db89c in __lll_lock_wait () from /lib/x86_64-linux-gnu/libpthread.so.0 #1 0x00007f817e4d7065 in _L_lock_858 () from /lib/x86_64-linux-gnu/libpthread.so.0 #2 0x00007f817e4d6eba in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0 #3 0x0000000000876743 in Mutex::Lock(bool) () #4 0x00000000006e9079 in PG::lock(bool) () #5 0x000000000068d4be in OSD::consume_map() () #6 0x000000000069958f in OSD::handle_osd_map(MOSDMap*) () #7 0x000000000069c2bb in OSD::_dispatch(Message*) () #8 0x000000000069c9c6 in OSD::ms_dispatch(Message*) () #9 0x000000000096ff39 in DispatchQueue::entry() () #10 0x000000000089905d in DispatchQueue::DispatchThread::entry() () #11 0x00007f817e4d4e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 #12 0x00007f817c667ccd in clone () from /lib/x86_64-linux-gnu/libc.so.6 #13 0x0000000000000000 in ?? () Thread 24 (Thread 0x7f816bb39700 (LWP 16654)): #0 0x00007f817e4d8d84 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0 #1 0x00000000007b9159 in IndexManager::get_index(coll_t, char const*, std::tr1::shared_ptr<CollectionIndex>*) () #2 0x000000000076e598 in FileStore::get_index(coll_t, std::tr1::shared_ptr<CollectionIndex>*) () #3 0x00000000007921cf in FileStore::lfn_open(coll_t, hobject_t const&, bool, std::tr1::shared_ptr<FDCache::FD>*, std::tr1::shared_ptr<CollectionIndex::Path>*, std::tr1::shared_ptr<CollectionIndex>*) () #4 0x0000000000795d4d in FileStore::getattr(coll_t, hobject_t const&, char const*, ceph::buffer::ptr&) () #5 0x00000000005efde6 in ReplicatedPG::get_object_context(hobject_t const&, object_locator_t const&, bool) () #6 0x000000000060d9fd in ReplicatedPG::prepare_transaction(ReplicatedPG::OpContext*) () #7 0x0000000000614dc8 in ReplicatedPG::do_op(std::tr1::shared_ptr<OpRequest>) () #8 0x00000000006fa940 in PG::do_request(std::tr1::shared_ptr<OpRequest>) () #9 0x0000000000651b13 in OSD::dequeue_op(boost::intrusive_ptr<PG>, std::tr1::shared_ptr<OpRequest>) () #10 0x00000000006689fb in OSD::OpWQ::_process(boost::intrusive_ptr<PG>) () #11 0x00000000006a4861 in ThreadPool::WorkQueueVal<std::pair<boost::intrusive_ptr<PG>, std::tr1::shared_ptr<OpRequest> >, boost::intrusive_ptr<PG> >::_process(boost::intrusive_ptr<PG>, ThreadPool::TPHandle&) () #12 0x00000000006a4bcc in ThreadPool::WorkQueueVal<std::pair<boost::intrusive_ptr<PG>, std::tr1::shared_ptr<OpRequest> >, boost::intrusive_ptr<PG> >::_void_process(void*, ThreadPool::TPHandle&) () #13 0x00000000008a4206 in ThreadPool::worker(ThreadPool::WorkThread*) () #14 0x00000000008a6030 in ThreadPool::WorkThread::entry() () #15 0x00007f817e4d4e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 Thread 16 (Thread 0x7f817434a700 (LWP 16620)): #0 0x00007f817e4db89c in __lll_lock_wait () from /lib/x86_64-linux-gnu/libpthread.so.0 #1 0x00007f817e4d7065 in _L_lock_858 () from /lib/x86_64-linux-gnu/libpthread.so.0 #2 0x00007f817e4d6eba in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0 #3 0x0000000000876743 in Mutex::Lock(bool) () #4 0x0000000000791dbe in FileStore::lfn_open(coll_t, hobject_t const&, bool, std::tr1::shared_ptr<FDCache::FD>*, std::tr1::shared_ptr<CollectionIndex::Path>*, std::tr1::shared_ptr<CollectionIndex>*) () #5 0x000000000079ae31 in FileStore::_clone(coll_t, hobject_t const&, hobject_t const&, SequencerPosition const&) () #6 0x000000000079e529 in FileStore::_do_transaction(ObjectStore::Transaction&, unsigned long, int) () #7 0x00000000007a2321 in FileStore::_do_transactions(std::list<ObjectStore::Transaction*, std::allocator<ObjectStore::Transaction*> >&, unsigned long, ThreadPool::TPHandle*) () #8 0x00000000007a25b6 in FileStore::_do_op(FileStore::OpSequencer*, ThreadPool::TPHandle&) () #9 0x00000000008a4206 in ThreadPool::worker(ThreadPool::WorkThread*) () #10 0x00000000008a6030 in ThreadPool::WorkThread::entry() () #11 0x00007f817e4d4e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 #12 0x00007f817c667ccd in clone () from /lib/x86_64-linux-gnu/libc.so.6 #13 0x0000000000000000 in ?? () Thread 10 (Thread 0x7f8172b47700 (LWP 16629)): #0 0x00007f817e4db89c in __lll_lock_wait () from /lib/x86_64-linux-gnu/libpthread.so.0 #1 0x00007f817e4d7065 in _L_lock_858 () from /lib/x86_64-linux-gnu/libpthread.so.0 #2 0x00007f817e4d6eba in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0 #3 0x0000000000876743 in Mutex::Lock(bool) () #4 0x00000000006e9079 in PG::lock(bool) () #5 0x00000000005e4f7c in ReplicatedPG::op_commit(ReplicatedPG::RepGather*) () #6 0x00000000008012b0 in Finisher::finisher_thread_entry() () #7 0x00007f817e4d4e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 #8 0x00007f817c667ccd in clone () from /lib/x86_64-linux-gnu/libc.so.6 #9 0x0000000000000000 in ?? () Thread 6 (Thread 0x7f8170b43700 (LWP 16644)): #0 0x00007f817e4db89c in __lll_lock_wait () from /lib/x86_64-linux-gnu/libpthread.so.0 #1 0x00007f817e4d7065 in _L_lock_858 () from /lib/x86_64-linux-gnu/libpthread.so.0 #2 0x00007f817e4d6eba in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0 #3 0x0000000000876743 in Mutex::Lock(bool) () #4 0x000000000069c822 in OSD::ms_dispatch(Message*) () #5 0x000000000096ff39 in DispatchQueue::entry() () #6 0x000000000089905d in DispatchQueue::DispatchThread::entry() () #7 0x00007f817e4d4e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 #8 0x00007f817c667ccd in clone () from /lib/x86_64-linux-gnu/libc.so.6 #9 0x0000000000000000 in ?? ()
Actions