Project

General

Profile

Bug #6897

ceph osd crashed while running rados test

Added by Tamilarasi muthamizhan over 10 years ago. Updated over 10 years ago.

Status:
Duplicate
Priority:
Urgent
Assignee:
-
Category:
-
Target version:
-
% Done:

0%

Source:
Q/A
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

logs: ubuntu@teuthology:/a/teuthology-2013-11-24_23:00:03-rados-master-testing-basic-plana/117237

     0> 2013-11-25 01:23:19.758927 7ff0cca12700 -1 *** Caught signal (Aborted) **
 in thread 7ff0cca12700

 ceph version 0.72-279-gb089adb (b089adb85af0a306e662918bba26fbd196f9192c)
 1: ceph-osd() [0x88850a]
 2: (()+0xfcb0) [0x7ff0e2192cb0]
 3: (gsignal()+0x35) [0x7ff0e068d425]
 4: (abort()+0x17b) [0x7ff0e0690b8b]
 5: (__gnu_cxx::__verbose_terminate_handler()+0x11d) [0x7ff0e0fe069d]
 6: (()+0xb5846) [0x7ff0e0fde846]
 7: (()+0xb5873) [0x7ff0e0fde873]
 8: (()+0xb596e) [0x7ff0e0fde96e]
 9: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1df) [0x94f2bf]
 10: (PG::activate(ObjectStore::Transaction&, unsigned int, std::list<Context*, std::allocator<Context*> >&, std::map<int, std::map<pg_t, pg_query_t, std::less<pg_t>, std::allocator<std::pair<pg_t const, p
g_query_t> > >, std::less<int>, std::allocator<std::pair<int const, std::map<pg_t, pg_query_t, std::less<pg_t>, std::allocator<std::pair<pg_t const, pg_query_t> > > > > >&, std::map<int, std::vector<std::p
air<pg_notify_t, std::map<unsigned int, pg_interval_t, std::less<unsigned int>, std::allocator<std::pair<unsigned int const, pg_interval_t> > > >, std::allocator<std::pair<pg_notify_t, std::map<unsigned in
t, pg_interval_t, std::less<unsigned int>, std::allocator<std::pair<unsigned int const, pg_interval_t> > > > > >, std::less<int>, std::allocator<std::pair<int const, std::vector<std::pair<pg_notify_t, std:
:map<unsigned int, pg_interval_t, std::less<unsigned int>, std::allocator<std::pair<unsigned int const, pg_interval_t> > > >, std::allocator<std::pair<pg_notify_t, std::map<unsigned int, pg_interval_t, std
::less<unsigned int>, std::allocator<std::pair<unsigned int const, pg_interval_t> > > > > > > > >*)+0x2cb4) [0x7981a4]
 11: (PG::RecoveryState::Active::Active(boost::statechart::state<PG::RecoveryState::Active, PG::RecoveryState::Primary, PG::RecoveryState::Activating, (boost::statechart::history_mode)0>::my_context)+0x39b
) [0x798c6b]
 12: (boost::statechart::state<PG::RecoveryState::Active, PG::RecoveryState::Primary, PG::RecoveryState::Activating, (boost::statechart::history_mode)0>::shallow_construct(boost::intrusive_ptr<PG::Recovery
State::Primary> const&, boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>&)+0x5c) [0x7cd01
c]
 13: (boost::statechart::detail::safe_reaction_result boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0>
::transit_impl<PG::RecoveryState::Active, PG::RecoveryState::RecoveryMachine, boost::statechart::detail::no_transition_function>(boost::statechart::detail::no_transition_function const&)+0x94) [0x7cdad4]
 14: (boost::statechart::simple_state<PG::RecoveryState::Peering, PG::RecoveryState::Primary, PG::RecoveryState::GetInfo, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const
&, void const*)+0x192) [0x7cdd72]
 15: (boost::statechart::simple_state<PG::RecoveryState::WaitFlushedPeering, PG::RecoveryState::Peering, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl
_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, v
oid const*)+0x90) [0x7c80a0]
 16: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::process_queued_events()+0xfb) [0x
7ae1db]
 17: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::process_event(boost::statechart::
event_base const&)+0x1e) [0x7ae35e]
 18: (PG::handle_activate_map(PG::RecoveryCtx*)+0x103) [0x769dd3]
 19: (OSD::advance_pg(unsigned int, PG*, ThreadPool::TPHandle&, PG::RecoveryCtx*, std::set<boost::intrusive_ptr<PG>, std::less<boost::intrusive_ptr<PG> >, std::allocator<boost::intrusive_ptr<PG> > >*)+0x42
0) [0x620dd0]
 20: (OSD::process_peering_events(std::list<PG*, std::allocator<PG*> > const&, ThreadPool::TPHandle&)+0x1d3) [0x6210f3]
 21: (OSD::PeeringWQ::_process(std::list<PG*, std::allocator<PG*> > const&, ThreadPool::TPHandle&)+0x12) [0x668242]
 22: (ThreadPool::worker(ThreadPool::WorkThread*)+0x4e6) [0x9423d6]
 23: (ThreadPool::WorkThread::entry()+0x10) [0x9441e0]
 24: (()+0x7e9a) [0x7ff0e218ae9a]
 25: (clone()+0x6d) [0x7ff0e074b3fd]
 NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this.

ubuntu@teuthology:/a/teuthology-2013-11-24_23:00:03-rados-master-testing-basic-plana/117237$ cat config.yaml 
archive_path: /var/lib/teuthworker/archive/teuthology-2013-11-24_23:00:03-rados-master-testing-basic-plana/117237
description: rados/thrash/{clusters/fixed-2.yaml fs/xfs.yaml msgr-failures/few.yaml
  thrashers/default.yaml workloads/snaps-many-objects.yaml}
email: null
job_id: '117237'
kernel: &id001
  kdb: true
  sha1: 68174f0c97e7c0561aa844059569e3cbf0a43de1
last_in_suite: false
machine_type: plana
name: teuthology-2013-11-24_23:00:03-rados-master-testing-basic-plana
nuke-on-error: true
os_type: ubuntu
overrides:
  admin_socket:
    branch: master
  ceph:
    conf:
      global:
        ms inject socket failures: 5000
      mon:
        debug mon: 20
        debug ms: 1
        debug paxos: 20
      osd:
        debug ms: 1
        debug osd: 5
        osd sloppy crc: true
    fs: xfs
    log-whitelist:
    - slow request
    sha1: b089adb85af0a306e662918bba26fbd196f9192c
  ceph-deploy:
    branch:
      dev: master
    conf:
      client:
        log file: /var/log/ceph/ceph-$name.$pid.log
      mon:
        debug mon: 1
        debug ms: 20
        debug paxos: 20
  install:
    ceph:
      sha1: b089adb85af0a306e662918bba26fbd196f9192c
  s3tests:
    branch: master
  workunit:
    sha1: b089adb85af0a306e662918bba26fbd196f9192c
owner: scheduled_teuthology@teuthology
roles:
- - mon.a
  - mon.c
  - osd.0
  - osd.1
  - osd.2
  - client.0
- - mon.b
  - mds.a
  - osd.3
  - osd.4
  - osd.5
  - client.1
targets:
  ubuntu@plana10.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDZM11QYmJVb8X/BHwLAOgozCK/ZG4XtzQZb5znJiFPwyi7vSZtCM7/DGWm6FGTUXg1bEcJHdD83EpvVgqTue/c6iTKCglzuB8eadW7gErwXjJgy/DCmJOHjmx/0fNCZi60nBVZQW3V1nptIgaFYHSFgPMhQSvEUg01ohfPlLKgnyeb6ciRxcDmJcrGpUrDNutxLf0aUC9gDF8tJ1nE5WV6tOSMNDpgVf9L3WwkxYj+G/77l+eSPqgDHs751C5iIzvt5eWTU+vfUd4uymPlBihPQXqnyfcriFJK8Tx80qofODJNxOLd9f+8wAUKXvY/pxSHL9h6sIrRI17TXPR9ip/V
  ubuntu@plana17.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDGfArz9/DyU/v0oqstjIzMHySVtZQwjWQ1iZOoJpuTdXUlHgSk2FciaFuCimoN4oxUH3PUfTIw2RO4BSTv1VPmpFsYKmlcYsFA0vv6CLS1MeF6vsuGz4nae93byu5X8WxzQ8pYeLXcGb6VGLASzaNp/Zp19s/EB3HuXT/Ch3E/Uz87dW+zizpgflhNsmDyK1MxN6JWZvm5M1fN9YmXwwg7hMB3GJBQKennelxhD9RudU9YvT9ekj637zGsmAfQrz6p5w3RoSkGdEh8rAJpOuSJPfO0pkWDvXZWzoqBsa4jRMSvF3eSh0WpO6xE4aRcr4vPzm/7ujxs8zoChgk5V4Lr
tasks:
- internal.lock_machines:
  - 2
  - plana
- internal.save_config: null
- internal.check_lock: null
- internal.connect: null
- internal.check_conflict: null
- internal.check_ceph_data: null
- internal.vm_setup: null
- kernel: *id001
- internal.base: null
- internal.archive: null
- internal.coredump: null
- internal.sudo: null
- internal.syslog: null
- internal.timer: null
- chef: null
- clock.check: null
- install: null
- ceph:
    log-whitelist:
    - wrongly marked me down
    - objects unfound and apparently lost
- thrashosds:
    chance_pgnum_grow: 1
    chance_pgpnum_fix: 1
    timeout: 1200
- rados:
    clients:
    - client.0
    objects: 500
    op_weights:
      copy_from: 50
      delete: 50
      read: 100
      rollback: 50
      snap_create: 50
      snap_remove: 50
      write: 100
    ops: 4000
teuthology_branch: master
verbose: true


Related issues

Duplicated by Ceph - Bug #6896: osd/PG.cc: 1302: FAILED assert(active == acting.size()) Resolved 11/25/2013

History

#1 Updated by David Zafman over 10 years ago

  • Status changed from New to Duplicate

Already fixed 6896 today from this run.

Also available in: Atom PDF