Project

General

Profile

Actions

Bug #53713

closed

[pwl] persistent cache hits abort when running rbd bench

Added by Deepika Upadhyay over 2 years ago. Updated over 2 years ago.

Status:
Closed
Priority:
High
Assignee:
-
Target version:
-
% Done:

0%

Source:
Tags:
Backport:
pacific
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

1. clone latest pacific upstream branch
2. compile ceph with options -DWITH_RBD_SSD_CACHE=ON -DWITH_RBD_RWL=ON
3. start vstart with pwl cache:

 MON=1 OSD=1 MGR=0 MDS=0 RGW=0 ../src/vstart.sh -n -d -o "debug librbd = 20
debug rbd = 20
rbd_default_features = 61
rbd_persistent_cache_mode = rwl
rbd_persistent_cache_path = /home/ideepika/ceph2/ceph/build/write_back_cache
rbd_persistent_cache_size = 1073741824
rbd_plugins = pwl_cache" 

cache status during rbd bench run:

Watchers:
    watcher=172.21.10.3:0/3672603674 client.4113 cookie=140392213283296
Image cache state: {"present":"true","empty":"true","clean":"true","cache_type":"rwl","pwl_host":"vossi03","pwl_path":"/home/ideepika/ceph2/ceph/build/write_back_cache/rbd-pwl.test1.100d7102aa61.pool","pwl_size":1073741824}

4. create pool, image and run rbd bench:

 bin/ceph osd pool create test1 128;bin/rbd create --size 1024 test1/image1;bin/rbd ls test1;bin/rbd bench --io-type write image1 --pool=test1 --io-threads=16

the ios error out hitting ceph_abort with below stacktrace, I am yet to verify it on master

/home/ideepika/ceph2/ceph/src/common/lockdep.cc: 171: ceph_abort_msg("abort() called")

 ceph version 16.2.4-2006-gbb6237f728a (bb6237f728a28d0a16efe3ac8107d7882d53d03c) pacific (stable)
 1: (ceph::__ceph_abort(char const*, int, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0xfe) [0x7fafe347b926]
 2: /home/ideepika/ceph2/ceph/build/lib/libceph-common.so.2(+0x17f0e46) [0x7fafe3580e46]
 3: (lockdep_register(char const*)+0x24) [0x7fafe358123d]
 4: (ceph::mutex_debug_detail::mutex_debugging_base::_register()+0x20) [0x7fafe358e48a]
 5: (ceph::mutex_debug_detail::mutex_debugging_base::mutex_debugging_base(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, bool)+0x9a) [0x7fafe358e3ec]
 6: (ceph::mutex_debug_detail::mutex_debug_impl<false>::mutex_debug_impl(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, bool)+0x66) [0x55825221f108]
 7: (ceph::mutex_debug_detail::mutex_debug_impl<false> ceph::make_mutex<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&&)+0x58) [0x558252405e62]
 8: (librbd::cache::pwl::WriteLogEntry::WriteLogEntry(std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>, unsigned long, unsigned long)+0x11d) [0x7faf9af9530b]
 9: (librbd::cache::pwl::rwl::WriteLogEntry::WriteLogEntry(std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>, unsigned long, unsigned long)+0x56) [0x7faf9afb63b8]
 10: (void __gnu_cxx::new_allocator<librbd::cache::pwl::rwl::WriteLogEntry>::construct<librbd::cache::pwl::rwl::WriteLogEntry, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(librbd::cache::pwl::rwl::WriteLogEntry*, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x99) [0x7faf9afc5e1d]
 11: (void std::allocator_traits<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> >::construct<librbd::cache::pwl::rwl::WriteLogEntry, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>&, librbd::cache::pwl::rwl::WriteLogEntry*, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x62) [0x7faf9afc561e]
 12: (std::_Sp_counted_ptr_inplace<librbd::cache::pwl::rwl::WriteLogEntry, std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, (__gnu_cxx::_Lock_policy)2>::_Sp_counted_ptr_inplace<std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0xd5) [0x7faf9afc48f5]
 13: (std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<librbd::cache::pwl::rwl::WriteLogEntry, std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(librbd::cache::pwl::rwl::WriteLogEntry*&, std::_Sp_alloc_shared_tag<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> >, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x100) [0x7faf9afc3a38]
 14: (std::__shared_ptr<librbd::cache::pwl::rwl::WriteLogEntry, (__gnu_cxx::_Lock_policy)2>::__shared_ptr<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::_Sp_alloc_shared_tag<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> >, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x7a) [0x7faf9afc336e]
 15: (std::shared_ptr<librbd::cache::pwl::rwl::WriteLogEntry>::shared_ptr<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::_Sp_alloc_shared_tag<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> >, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x67) [0x7faf9afc2eef]
 16: (std::shared_ptr<librbd::cache::pwl::rwl::WriteLogEntry> std::allocate_shared<librbd::cache::pwl::rwl::WriteLogEntry, std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> const&, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x62) [0x7faf9afc289d]
 17: (std::shared_ptr<librbd::cache::pwl::rwl::WriteLogEntry> std::make_shared<librbd::cache::pwl::rwl::WriteLogEntry, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x7e) [0x7faf9afc1f10]
 18: (librbd::cache::pwl::rwl::Builder<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >::create_write_log_entry(std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>, unsigned long, unsigned long)+0x43) [0x7faf9afc1693]
 19: (librbd::cache::pwl::C_WriteRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >::create_operation(unsigned long, unsigned long)+0xbf) [0x7faf9af80849]
 20: (librbd::cache::pwl::C_WriteRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >::setup_log_operations(librbd::cache::pwl::DeferredContexts&)+0x4bf) [0x7faf9af80dc7]
 21: (librbd::cache::pwl::C_WriteRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >::dispatch()+0x1d1) [0x7faf9af8057f]
 22: (librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::alloc_and_dispatch_io_req(librbd::cache::pwl::C_BlockIORequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >*)+0x205) [0x7faf9af3386d]
 23: (librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::write(std::vector<std::pair<unsigned long, unsigned long>, std::allocator<std::pair<unsigned long, unsigned long> > >&&, ceph::buffer::v15_2_0::list&&, int, Context*)::{lambda(librbd::cache::pwl::GuardedRequestFunctionContext&)#2}::operator()(librbd::cache::pwl::GuardedRequestFunctionContext&) const+0x41) [0x7faf9af3acbb]
 24: (boost::detail::function::void_function_obj_invoker1<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::write(std::vector<std::pair<unsigned long, unsigned long>, std::allocator<std::pair<unsigned long, unsigned long> > >&&, ceph::buffer::v15_2_0::list&&, int, Context*)::{lambda(librbd::cache::pwl::GuardedRequestFunctionContext&)#2}, void, librbd::cache::pwl::GuardedRequestFunctionContext&>::invoke(boost::detail::function::function_buffer&, librbd::cache::pwl::GuardedRequestFunctionContext&)+0x2b) [0x7faf9af51002]
 25: (boost::function1<void, librbd::cache::pwl::GuardedRequestFunctionContext&>::operator()(librbd::cache::pwl::GuardedRequestFunctionContext&) const+0x6c) [0x7faf9af3a288]
 26: (librbd::cache::pwl::GuardedRequestFunctionContext::finish(int)+0x42) [0x7faf9af2856c]
 27: (Context::complete(int)+0x27) [0x5582522535e1]
 28: (librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::detain_guarded_request(librbd::cache::pwl::C_BlockIORequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >*, librbd::cache::pwl::GuardedRequestFunctionContext*, bool)+0x249) [0x7faf9af2e827]
 29: (librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::write(std::vector<std::pair<unsigned long, unsigned long>, std::allocator<std::pair<unsigned long, unsigned long> > >&&, ceph::buffer::v15_2_0::list&&, int, Context*)+0x515) [0x7faf9af2b2b5]
 30: (librbd::cache::WriteLogImageDispatch<librbd::ImageCtx>::write(librbd::io::AioCompletion*, std::vector<std::pair<unsigned long, unsigned long>, std::allocator<std::pair<unsigned long, unsigned long> > >&&, ceph::buffer::v15_2_0::list&&, std::shared_ptr<neorados::IOContext>, int, ZTracer::Trace const&, unsigned long, std::atomic<unsigned int>*, librbd::io::DispatchResult*, Context**, Context*)+0x282) [0x7faf9af1890e]
Actions

Also available in: Atom PDF