Actions
Bug #8777
closedosd/PGLog.h: 88: FAILED assert(rollback_info_trimmed_to_riter == log.rbegin())
% Done:
0%
Source:
Q/A
Tags:
Backport:
Regression:
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
Logs are in http://qa-proxy.ceph.com/teuthology/teuthology-2014-07-07_02:32:01-rados-master-testing-basic-plana/347163/
coredump info from @teuthology:/a/teuthology-2014-07-07_02:32:01-rados-master-testing-basic-plana/347163/remote/plana46/log/ceph-osd.4.log.gz
307656754- -6> 2014-07-07 21:20:32.324319 7f806679f700 7 osd.4 245 do_queries querying osd.3 on 1 PGs 307656850- -5> 2014-07-07 21:20:32.324321 7f805df8e700 5 filestore(/var/lib/ceph/osd/ceph-4) queue_transactions existing osr(2.7e 0x2b6e7f0)/0x2b6e7f0 307656995- -4> 2014-07-07 21:20:32.324327 7f805df8e700 2 filestore(/var/lib/ceph/osd/ceph-4) waiting 51 > 50 ops || 59778 > 104857600 307657123- -3> 2014-07-07 21:20:32.324323 7f806679f700 1 -- 10.214.132.32:6816/15743 --> 10.214.132.32:6801/15096 -- pg_query(1.e6 epoch 245) v3 -- ?+0 0x2862000 con 0x3158160 307657293- -2> 2014-07-07 21:20:32.324346 7f806679f700 5 filestore(/var/lib/ceph/osd/ceph-4) queue_transactions new osr(1.e6 0x34de7d0)/0x34de7d0 307657433- -1> 2014-07-07 21:20:32.324356 7f806679f700 2 filestore(/var/lib/ceph/osd/ceph-4) waiting 51 > 50 ops || 60358 > 104857600 307657561: 0> 2014-07-07 21:20:32.327917 7f805e78f700 -1 *** Caught signal (Aborted) ** 307657643- in thread 7f805e78f700 307657667- 307657668- ceph version 0.82-465-gdd5b60b (dd5b60b4f1c65702a71111e1e63346f3a056057d) 307657743- 1: ceph-osd() [0x9a823a] 307657769- 2: (()+0xfcb0) [0x7f8076a03cb0] 307657802- 3: (gsignal()+0x35) [0x7f8074efe425] 307657840- 4: (abort()+0x17b) [0x7f8074f01b8b] 307657877- 5: (__gnu_cxx::__verbose_terminate_handler()+0x11d) [0x7f807585169d] 307657947- 6: (()+0xb5846) [0x7f807584f846] 307657981- 7: (()+0xb5873) [0x7f807584f873] 307658015- 8: (()+0xb596e) [0x7f807584f96e] 307658049- 9: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1df) [0xa8c9df] 307658141- 10: (PG::RecoveryState::Stray::react(PG::MLogRec const&)+0x8c3) [0x789593] 307658217- 11: (boost::statechart::simple_state<PG::RecoveryState::Stray, PG::RecoveryState::Started, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0x182) [0x7b8e52] 307658644- 12: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::send_event(boost::statechart::event_base const&)+0x5b) [0x79cd8b] 307658882- 13: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::process_event(boost::statechart::event_base const&)+0x11) [0x79d0e1] 307659123- 14: (PG::handle_peering_event(std::tr1::shared_ptr<PG::CephPeeringEvt>, PG::RecoveryCtx*)+0x303) [0x7540f3] 307659232- 15: (OSD::process_peering_events(std::list<PG*, std::allocator<PG*> > const&, ThreadPool::TPHandle&)+0x2ce) [0x664bbe] 307659352- 16: (OSD::PeeringWQ::_process(std::list<PG*, std::allocator<PG*> > const&, ThreadPool::TPHandle&)+0x12) [0x6b9392] 307659468- 17: (ThreadPool::worker(ThreadPool::WorkThread*)+0x4e6) [0xa7c0e6] 307659536- 18: (ThreadPool::WorkThread::entry()+0x10) [0xa7f190] 307659591- 19: (()+0x7e9a) [0x7f80769fbe9a] 307659625- 20: (clone()+0x6d) [0x7f8074fbc3fd] 307659662- NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. 307659755- 307659756---- logging levels ---
archive_path: /var/lib/teuthworker/archive/teuthology-2014-07-07_02:32:01-rados-master-testing-basic-plana/347163 branch: master description: rados/thrash/{clusters/fixed-2.yaml fs/ext4.yaml msgr-failures/few.yaml thrashers/default.yaml workloads/cache.yaml} email: null job_id: '347163' kernel: &id001 kdb: true sha1: 20bdcbcb52ac95397901581086368dd79c06091f last_in_suite: false machine_type: plana name: teuthology-2014-07-07_02:32:01-rados-master-testing-basic-plana nuke-on-error: true os_type: ubuntu overrides: admin_socket: branch: master ceph: conf: global: ms inject socket failures: 5000 mon: debug mon: 20 debug ms: 1 debug paxos: 20 osd: debug filestore: 20 debug journal: 20 debug ms: 1 debug osd: 20 fs: ext4 log-whitelist: - slow request - must scrub before tier agent can activate sha1: dd5b60b4f1c65702a71111e1e63346f3a056057d ceph-deploy: branch: dev: master conf: client: log file: /var/log/ceph/ceph-$name.$pid.log mon: debug mon: 1 debug ms: 20 debug paxos: 20 osd default pool size: 2 install: ceph: sha1: dd5b60b4f1c65702a71111e1e63346f3a056057d s3tests: branch: master workunit: sha1: dd5b60b4f1c65702a71111e1e63346f3a056057d owner: scheduled_teuthology@teuthology priority: 1000 roles: - - mon.a - mon.c - osd.0 - osd.1 - osd.2 - client.0 - - mon.b - mds.a - osd.3 - osd.4 - osd.5 - client.1 suite: rados targets: ubuntu@plana46.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDhaevKCLj5pgAP+JcawoIx7omJ0D2cmFkHtKO3fImFhHLWxD/bKypBQcM2w5svLpND2L6Gd8NuKs5lskklYclQUoTC8RlIMGVRRQJJQYWXAcCBq7MVwNOcIh1WiBI4o3JoufnH51eyY99kMNwn/G1gx9XQ3AofLMDFh/XWkkSa53BNXhj8fpsBXI0ISdm5c2Zvln8FYfsHnIbQZPLv2tSkYgParodlJdwBXmTaKCabXIQmWkngXRg28Mt04ZbxGlTbkJIehy5O6PuIG5BimrkOYlQI3m4s+hWCiXvZUOyMEKVQ1+m3C13XpYU7+LfpAVySFZiOLENdKuaVnG/tLz7f ubuntu@plana49.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8zmyBYSlUYcY+7hO2WPa2uECTc3U4k/Sm7DY/YZAEtc0Z8i1FpvFa1MA1PV3g2DAzo+E6JIBFzbFSrgXeDsEMFl1Qy2jdrvnq66veVOEyDk4y7I4wseE/eTSnhTQE720cnWq1DuVyFgcyRvYGJo3HHoFTgxLwvPYdhfLPq8elBNJmuOllsqCgsmX5rwqZmsQcP29aus/1s3IVF1TEHYmJLTwRb8qW6W3r55C2ZbSwrvvNXb7zFyydCgAfPC+8dfqiMfp3T7Gc56HUHAWvXU9EnxDINHQmLhrdccFyq0BVd+i2m1ubuXULK2RMImFojhEnkLTFH2Qa5hSegvLLffab tasks: - internal.lock_machines: - 2 - plana - internal.save_config: null - internal.check_lock: null - internal.connect: null - internal.serialize_remote_roles: null - internal.check_conflict: null - internal.check_ceph_data: null - internal.vm_setup: null - kernel: *id001 - internal.base: null - internal.archive: null - internal.coredump: null - internal.sudo: null - internal.syslog: null - internal.timer: null - chef: null - clock.check: null - install: null - ceph: conf: osd: osd debug reject backfill probability: 0.3 log-whitelist: - wrongly marked me down - objects unfound and apparently lost - thrashosds: chance_pgnum_grow: 1 chance_pgpnum_fix: 1 timeout: 1200 - exec: client.0: - ceph osd pool create base 4 - ceph osd pool create cache 4 - ceph osd tier add base cache - ceph osd tier cache-mode cache writeback - ceph osd tier set-overlay base cache - ceph osd pool set cache hit_set_type bloom - ceph osd pool set cache hit_set_count 8 - ceph osd pool set cache hit_set_period 3600 - rados: clients: - client.0 objects: 500 op_weights: copy_from: 50 delete: 50 evict: 50 flush: 50 read: 100 try_flush: 50 write: 100 ops: 4000 pools: - base teuthology_branch: master tube: plana verbose: false worker_log: /var/lib/teuthworker/archive/worker_logs/worker.plana.20456
description: rados/thrash/{clusters/fixed-2.yaml fs/ext4.yaml msgr-failures/few.yaml thrashers/default.yaml workloads/cache.yaml} duration: 1711.7090220451355 failure_reason: timed out waiting for admin_socket to appear after osd.4 restart flavor: basic mon.a-kernel-sha1: 20bdcbcb52ac95397901581086368dd79c06091f mon.b-kernel-sha1: 20bdcbcb52ac95397901581086368dd79c06091f owner: scheduled_teuthology@teuthology success: false
Actions