Project

General

Profile

Actions

Bug #56382

closed

ONode ref counting is broken

Added by Telemetry Bot almost 2 years ago. Updated about 1 year ago.

Status:
Resolved
Priority:
Normal
Assignee:
Target version:
% Done:

100%

Source:
Telemetry
Tags:
backport_processed
Backport:
pacific quincy
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):

387bdad7e321a8b482ce7c25081e214416a524e4fcddc77dcd2295ccfe0a319b


Description

http://telemetry.front.sepia.ceph.com:4000/d/jByk5HaMz/crash-spec-x-ray?orgId=1&var-sig_v2=6bb2da74940c132cf3884cb97718e95088a109cfebb7a89f995ddc8fdf456f63

Sanitized backtrace:

    ceph::buffer::ptr::release()
    BlueStore::Onode::put()
    BlueStore::OnodeSpace::_remove(ghobject_t const&)
    LruOnodeCacheShard::_trim_to(unsigned long)
    BlueStore::OnodeSpace::add(ghobject_t const&, boost::intrusive_ptr<BlueStore::Onode>&)
    BlueStore::Collection::get_onode(ghobject_t const&, bool, bool)
    BlueStore::_txc_add_transaction(BlueStore::TransContext*, ceph::os::Transaction*)
    BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, std::vector<ceph::os::Transaction, std::allocator<ceph::os::Transaction> >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)
    non-virtual thunk to PrimaryLogPG::queue_transactions(std::vector<ceph::os::Transaction, std::allocator<ceph::os::Transaction> >&, boost::intrusive_ptr<OpRequest>)
    ReplicatedBackend::submit_transaction(hobject_t const&, object_stat_sum_t const&, eversion_t const&, std::unique_ptr<PGTransaction, std::default_delete<PGTransaction> >&&, eversion_t const&, eversion_t const&, std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> >&&, std::optional<pg_hit_set_history_t>&, Context*, unsigned long, osd_reqid_t, boost::intrusive_ptr<OpRequest>)
    PrimaryLogPG::issue_repop(PrimaryLogPG::RepGather*, PrimaryLogPG::OpContext*)
    PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)
    PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)
    PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)
    OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)
    ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)
    OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)
    ShardedThreadPool::shardedthreadpool_worker(unsigned int)
    ShardedThreadPool::WorkThreadSharded::entry()

Crash dump sample:
{
    "backtrace": [
        "/lib64/libpthread.so.0(+0x12ce0) [0x7fceedb98ce0]",
        "(ceph::buffer::v15_2_0::ptr::release()+0x31) [0x5648c0eafc81]",
        "(BlueStore::Onode::put()+0x1c9) [0x5648c0acafd9]",
        "(std::_Hashtable<ghobject_t, std::pair<ghobject_t const, boost::intrusive_ptr<BlueStore::Onode> >, mempool::pool_allocator<(mempool::pool_index_t)4, std::pair<ghobject_t const, boost::intrusive_ptr<BlueStore::Onode> > >, std::__detail::_Select1st, std::equal_to<ghobject_t>, std::hash<ghobject_t>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true> >::_M_erase(unsigned long, std::__detail::_Hash_node_base*, std::__detail::_Hash_node<std::pair<ghobject_t const, boost::intrusive_ptr<BlueStore::Onode> >, true>*)+0x68) [0x5648c0b94ed8]",
        "(BlueStore::OnodeSpace::_remove(ghobject_t const&)+0x29b) [0x5648c0acadbb]",
        "(LruOnodeCacheShard::_trim_to(unsigned long)+0x110) [0x5648c0b9a6f0]",
        "(BlueStore::OnodeSpace::add(ghobject_t const&, boost::intrusive_ptr<BlueStore::Onode>&)+0x49d) [0x5648c0acbb4d]",
        "(BlueStore::Collection::get_onode(ghobject_t const&, bool, bool)+0x46a) [0x5648c0b1adba]",
        "(BlueStore::_txc_add_transaction(BlueStore::TransContext*, ceph::os::Transaction*)+0x1245) [0x5648c0b5c2e5]",
        "(BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, std::vector<ceph::os::Transaction, std::allocator<ceph::os::Transaction> >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)+0x316) [0x5648c0b5e496]",
        "(non-virtual thunk to PrimaryLogPG::queue_transactions(std::vector<ceph::os::Transaction, std::allocator<ceph::os::Transaction> >&, boost::intrusive_ptr<OpRequest>)+0x55) [0x5648c075e2e5]",
        "(ReplicatedBackend::submit_transaction(hobject_t const&, object_stat_sum_t const&, eversion_t const&, std::unique_ptr<PGTransaction, std::default_delete<PGTransaction> >&&, eversion_t const&, eversion_t const&, std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> >&&, std::optional<pg_hit_set_history_t>&, Context*, unsigned long, osd_reqid_t, boost::intrusive_ptr<OpRequest>)+0xca8) [0x5648c0978f98]",
        "(PrimaryLogPG::issue_repop(PrimaryLogPG::RepGather*, PrimaryLogPG::OpContext*)+0xc90) [0x5648c06c4da0]",
        "(PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x1097) [0x5648c07273b7]",
        "(PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x395f) [0x5648c072b91f]",
        "(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xe2e) [0x5648c073291e]",
        "(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x332) [0x5648c05b3752]",
        "(ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x6f) [0x5648c0877bff]",
        "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xaf8) [0x5648c05d20d8]",
        "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x5648c0cd79c4]",
        "(ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x5648c0cd8d64]",
        "/lib64/libpthread.so.0(+0x81cf) [0x7fceedb8e1cf]",
        "clone()" 
    ],
    "ceph_version": "17.2.0",
    "crash_id": "2022-06-23T03:21:00.440853Z_59aaded8-9bdd-4a4d-a032-6117d87a9e7a",
    "entity_name": "osd.e3128680d062dd6c66a333f81cf3e709b36d9f1e",
    "os_id": "centos",
    "os_name": "CentOS Stream",
    "os_version": "8",
    "os_version_id": "8",
    "process_name": "ceph-osd",
    "stack_sig": "387bdad7e321a8b482ce7c25081e214416a524e4fcddc77dcd2295ccfe0a319b",
    "timestamp": "2022-06-23T03:21:00.440853Z",
    "utsname_machine": "x86_64",
    "utsname_release": "5.4.0-109-generic",
    "utsname_sysname": "Linux",
    "utsname_version": "#123-Ubuntu SMP Fri Apr 8 09:10:54 UTC 2022" 
}


Related issues 6 (0 open6 closed)

Has duplicate bluestore - Bug #57895: OSD crash in Onode::put() Duplicate

Actions
Is duplicate of RADOS - Bug #58218: osdDuplicate

Actions
Has duplicate bluestore - Bug #59074: OSD restarted with this error: Caught signal (Segmentation fault) in thread thread_name:bstore_kv_finalDuplicate

Actions
Is duplicate of bluestore - Bug #53002: crash BlueStore::Onode::put from BlueStore::TransContext::~TransContextDuplicateIgor Fedotov

Actions
Copied to bluestore - Backport #58675: quincy: ONode ref counting is brokenResolvedIgor FedotovActions
Copied to bluestore - Backport #58676: pacific: ONode ref counting is brokenResolvedIgor FedotovActions
Actions

Also available in: Atom PDF