Actions
Bug #53713
closed[pwl] persistent cache hits abort when running rbd bench
Status:
Closed
Priority:
High
Assignee:
-
Target version:
-
% Done:
0%
Source:
Tags:
Backport:
pacific
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
1. clone latest pacific upstream branch
2. compile ceph with options -DWITH_RBD_SSD_CACHE=ON -DWITH_RBD_RWL=ON
3. start vstart with pwl cache:
MON=1 OSD=1 MGR=0 MDS=0 RGW=0 ../src/vstart.sh -n -d -o "debug librbd = 20 debug rbd = 20 rbd_default_features = 61 rbd_persistent_cache_mode = rwl rbd_persistent_cache_path = /home/ideepika/ceph2/ceph/build/write_back_cache rbd_persistent_cache_size = 1073741824 rbd_plugins = pwl_cache"
cache status during rbd bench run:
Watchers: watcher=172.21.10.3:0/3672603674 client.4113 cookie=140392213283296 Image cache state: {"present":"true","empty":"true","clean":"true","cache_type":"rwl","pwl_host":"vossi03","pwl_path":"/home/ideepika/ceph2/ceph/build/write_back_cache/rbd-pwl.test1.100d7102aa61.pool","pwl_size":1073741824}
4. create pool, image and run rbd bench:
bin/ceph osd pool create test1 128;bin/rbd create --size 1024 test1/image1;bin/rbd ls test1;bin/rbd bench --io-type write image1 --pool=test1 --io-threads=16
the ios error out hitting ceph_abort with below stacktrace, I am yet to verify it on master
/home/ideepika/ceph2/ceph/src/common/lockdep.cc: 171: ceph_abort_msg("abort() called") ceph version 16.2.4-2006-gbb6237f728a (bb6237f728a28d0a16efe3ac8107d7882d53d03c) pacific (stable) 1: (ceph::__ceph_abort(char const*, int, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0xfe) [0x7fafe347b926] 2: /home/ideepika/ceph2/ceph/build/lib/libceph-common.so.2(+0x17f0e46) [0x7fafe3580e46] 3: (lockdep_register(char const*)+0x24) [0x7fafe358123d] 4: (ceph::mutex_debug_detail::mutex_debugging_base::_register()+0x20) [0x7fafe358e48a] 5: (ceph::mutex_debug_detail::mutex_debugging_base::mutex_debugging_base(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, bool)+0x9a) [0x7fafe358e3ec] 6: (ceph::mutex_debug_detail::mutex_debug_impl<false>::mutex_debug_impl(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, bool)+0x66) [0x55825221f108] 7: (ceph::mutex_debug_detail::mutex_debug_impl<false> ceph::make_mutex<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&&)+0x58) [0x558252405e62] 8: (librbd::cache::pwl::WriteLogEntry::WriteLogEntry(std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>, unsigned long, unsigned long)+0x11d) [0x7faf9af9530b] 9: (librbd::cache::pwl::rwl::WriteLogEntry::WriteLogEntry(std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>, unsigned long, unsigned long)+0x56) [0x7faf9afb63b8] 10: (void __gnu_cxx::new_allocator<librbd::cache::pwl::rwl::WriteLogEntry>::construct<librbd::cache::pwl::rwl::WriteLogEntry, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(librbd::cache::pwl::rwl::WriteLogEntry*, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x99) [0x7faf9afc5e1d] 11: (void std::allocator_traits<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> >::construct<librbd::cache::pwl::rwl::WriteLogEntry, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>&, librbd::cache::pwl::rwl::WriteLogEntry*, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x62) [0x7faf9afc561e] 12: (std::_Sp_counted_ptr_inplace<librbd::cache::pwl::rwl::WriteLogEntry, std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, (__gnu_cxx::_Lock_policy)2>::_Sp_counted_ptr_inplace<std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0xd5) [0x7faf9afc48f5] 13: (std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<librbd::cache::pwl::rwl::WriteLogEntry, std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(librbd::cache::pwl::rwl::WriteLogEntry*&, std::_Sp_alloc_shared_tag<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> >, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x100) [0x7faf9afc3a38] 14: (std::__shared_ptr<librbd::cache::pwl::rwl::WriteLogEntry, (__gnu_cxx::_Lock_policy)2>::__shared_ptr<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::_Sp_alloc_shared_tag<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> >, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x7a) [0x7faf9afc336e] 15: (std::shared_ptr<librbd::cache::pwl::rwl::WriteLogEntry>::shared_ptr<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::_Sp_alloc_shared_tag<std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> >, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x67) [0x7faf9afc2eef] 16: (std::shared_ptr<librbd::cache::pwl::rwl::WriteLogEntry> std::allocate_shared<librbd::cache::pwl::rwl::WriteLogEntry, std::allocator<librbd::cache::pwl::rwl::WriteLogEntry>, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::allocator<librbd::cache::pwl::rwl::WriteLogEntry> const&, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x62) [0x7faf9afc289d] 17: (std::shared_ptr<librbd::cache::pwl::rwl::WriteLogEntry> std::make_shared<librbd::cache::pwl::rwl::WriteLogEntry, std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&>(std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>&, unsigned long&, unsigned long&)+0x7e) [0x7faf9afc1f10] 18: (librbd::cache::pwl::rwl::Builder<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >::create_write_log_entry(std::shared_ptr<librbd::cache::pwl::SyncPointLogEntry>, unsigned long, unsigned long)+0x43) [0x7faf9afc1693] 19: (librbd::cache::pwl::C_WriteRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >::create_operation(unsigned long, unsigned long)+0xbf) [0x7faf9af80849] 20: (librbd::cache::pwl::C_WriteRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >::setup_log_operations(librbd::cache::pwl::DeferredContexts&)+0x4bf) [0x7faf9af80dc7] 21: (librbd::cache::pwl::C_WriteRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >::dispatch()+0x1d1) [0x7faf9af8057f] 22: (librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::alloc_and_dispatch_io_req(librbd::cache::pwl::C_BlockIORequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >*)+0x205) [0x7faf9af3386d] 23: (librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::write(std::vector<std::pair<unsigned long, unsigned long>, std::allocator<std::pair<unsigned long, unsigned long> > >&&, ceph::buffer::v15_2_0::list&&, int, Context*)::{lambda(librbd::cache::pwl::GuardedRequestFunctionContext&)#2}::operator()(librbd::cache::pwl::GuardedRequestFunctionContext&) const+0x41) [0x7faf9af3acbb] 24: (boost::detail::function::void_function_obj_invoker1<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::write(std::vector<std::pair<unsigned long, unsigned long>, std::allocator<std::pair<unsigned long, unsigned long> > >&&, ceph::buffer::v15_2_0::list&&, int, Context*)::{lambda(librbd::cache::pwl::GuardedRequestFunctionContext&)#2}, void, librbd::cache::pwl::GuardedRequestFunctionContext&>::invoke(boost::detail::function::function_buffer&, librbd::cache::pwl::GuardedRequestFunctionContext&)+0x2b) [0x7faf9af51002] 25: (boost::function1<void, librbd::cache::pwl::GuardedRequestFunctionContext&>::operator()(librbd::cache::pwl::GuardedRequestFunctionContext&) const+0x6c) [0x7faf9af3a288] 26: (librbd::cache::pwl::GuardedRequestFunctionContext::finish(int)+0x42) [0x7faf9af2856c] 27: (Context::complete(int)+0x27) [0x5582522535e1] 28: (librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::detain_guarded_request(librbd::cache::pwl::C_BlockIORequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >*, librbd::cache::pwl::GuardedRequestFunctionContext*, bool)+0x249) [0x7faf9af2e827] 29: (librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>::write(std::vector<std::pair<unsigned long, unsigned long>, std::allocator<std::pair<unsigned long, unsigned long> > >&&, ceph::buffer::v15_2_0::list&&, int, Context*)+0x515) [0x7faf9af2b2b5] 30: (librbd::cache::WriteLogImageDispatch<librbd::ImageCtx>::write(librbd::io::AioCompletion*, std::vector<std::pair<unsigned long, unsigned long>, std::allocator<std::pair<unsigned long, unsigned long> > >&&, ceph::buffer::v15_2_0::list&&, std::shared_ptr<neorados::IOContext>, int, ZTracer::Trace const&, unsigned long, std::atomic<unsigned int>*, librbd::io::DispatchResult*, Context**, Context*)+0x282) [0x7faf9af1890e]
Actions