Actions
Bug #43955
openOSD in metadata crush crash after reshard bucket
Status:
New
Priority:
Normal
Assignee:
-
Category:
OSD
Target version:
-
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
I have a ceph cluster for Object Storage (Rados gateway). The cluster use metadata (SSD) and data (HDD) crush separately.
Recently, I resharded some buckets in the cluster then sometimes, Two OSDs down without any reason.
I use `ceph crash info` to get info of the crash.
Please check for me.
{ "crash_id": "2020-02-03_06:40:46.163705Z_9db045c8-e7ca-45f6-9b21-9daae70ab2dc", "timestamp": "2020-02-03 06:40:46.163705Z", "process_name": "ceph-osd", "entity_name": "osd.4", "ceph_version": "14.2.3", "utsname_hostname": "snappy-ceph-s3-02", "utsname_sysname": "Linux", "utsname_release": "3.10.0-957.10.1.el7.x86_64", "utsname_version": "#1 SMP Mon Mar 18 15:06:45 UTC 2019", "utsname_machine": "x86_64", "os_name": "CentOS Linux", "os_id": "centos", "os_version_id": "7", "os_version": "7 (Core)", "backtrace": [ "(()+0xf5d0) [0x7f126217e5d0]", "(pread64()+0x33) [0x7f126217df63]", "(KernelDevice::read(unsigned long, unsigned long, ceph::buffer::v14_2_0::list*, IOContext*, bool)+0x12a) [0x555fa6a79c8a]", "(BlueFS::_read(BlueFS::FileReader*, BlueFS::FileReaderBuffer*, unsigned long, unsigned long, ceph::buffer::v14_2_0::list*, char*)+0x4c1) [0x555fa6a32501]", "(BlueRocksRandomAccessFile::Prefetch(unsigned long, unsigned long)+0x2a) [0x555fa6a5c2fa]", "(rocksdb::BlockBasedTableIterator<rocksdb::DataBlockIter, rocksdb::Slice>::InitDataBlock()+0x2bd) [0x555fa704ce5d]", "(rocksdb::BlockBasedTableIterator<rocksdb::DataBlockIter, rocksdb::Slice>::FindKeyForward()+0x8d) [0x555fa704cf5d]", "(()+0x1006d89) [0x555fa6fcad89]", "(rocksdb::MergingIterator::Next()+0x42) [0x555fa7060422]", "(rocksdb::DBIter::FindNextUserEntryInternal(bool, bool)+0xba2) [0x555fa6f595a2]", "(rocksdb::DBIter::Seek(rocksdb::Slice const&)+0x54c) [0x555fa6f5a6dc]", "(RocksDBStore::RocksDBWholeSpaceIteratorImpl::lower_bound(std::string const&, std::string const&)+0x44) [0x555fa69bebf4]", "(BlueStore::OmapIteratorImpl::OmapIteratorImpl(boost::intrusive_ptr<BlueStore::Collection>, boost::intrusive_ptr<BlueStore::Onode>, std::shared_ptr<KeyValueDB::IteratorImpl>)+0x11d) [0x555fa68f603d]", "(BlueStore::get_omap_iterator(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, ghobject_t const&)+0x1ac) [0x555fa6932f9c]", "(PrimaryLogPG::do_osd_ops(PrimaryLogPG::OpContext*, std::vector<OSDOp, std::allocator<OSDOp> >&)+0x1f1b) [0x555fa66696bb]", "(cls_cxx_map_get_vals(void*, std::string const&, std::string const&, unsigned long, std::map<std::string, ceph::buffer::v14_2_0::list, std::less<std::string>, std::allocator<std::pair<std::string const, ceph::buffer::v14_2_0::list> > >*, bool*)+0x1d5) [0x555fa676d365]", "(()+0xb1b2f) [0x7f1250fa6b2f]", "(()+0xbec69) [0x7f1250fb3c69]", "(ClassHandler::ClassMethod::exec(void*, ceph::buffer::v14_2_0::list&, ceph::buffer::v14_2_0::list&)+0x34) [0x555fa6548ef4]", "(PrimaryLogPG::do_osd_ops(PrimaryLogPG::OpContext*, std::vector<OSDOp, std::allocator<OSDOp> >&)+0x149e) [0x555fa6668c3e]", "(PrimaryLogPG::prepare_transaction(PrimaryLogPG::OpContext*)+0x97) [0x555fa667bed7]", "(PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x2f8) [0x555fa667c678]", "(PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x47c6) [0x555fa6682426]", "(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xbd4) [0x555fa6683174]", "(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x1a9) [0x555fa64bc849]", "(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x555fa675b0a2]", "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x9f4) [0x555fa64d9a74]", "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x433) [0x555fa6add613]", "(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x555fa6ae06b0]", "(()+0x7dd5) [0x7f1262176dd5]", "(clone()+0x6d) [0x7f126103cead]" ]
Updated by Sa Pham over 4 years ago
I have a ceph cluster for Object Storage (Rados gateway). The cluster use metadata (SSD) and data (HDD) crush separately.
Recently, I resharded some buckets in the cluster then sometimes, Two OSDs down without any reason.
I use `ceph crash info` to get info of the crash.
Please check for me.
{ "crash_id": "2020-02-03_06:40:46.163705Z_9db045c8-e7ca-45f6-9b21-9daae70ab2dc", "timestamp": "2020-02-03 06:40:46.163705Z", "process_name": "ceph-osd", "entity_name": "osd.4", "ceph_version": "14.2.3", "utsname_sysname": "Linux", "utsname_release": "3.10.0-957.10.1.el7.x86_64", "utsname_version": "#1 SMP Mon Mar 18 15:06:45 UTC 2019", "utsname_machine": "x86_64", "os_name": "CentOS Linux", "os_id": "centos", "os_version_id": "7", "os_version": "7 (Core)", "backtrace": [ "(()+0xf5d0) [0x7f126217e5d0]", "(pread64()+0x33) [0x7f126217df63]", "(KernelDevice::read(unsigned long, unsigned long, ceph::buffer::v14_2_0::list*, IOContext*, bool)+0x12a) [0x555fa6a79c8a]", "(BlueFS::_read(BlueFS::FileReader*, BlueFS::FileReaderBuffer*, unsigned long, unsigned long, ceph::buffer::v14_2_0::list*, char*)+0x4c1) [0x555fa6a32501]", "(BlueRocksRandomAccessFile::Prefetch(unsigned long, unsigned long)+0x2a) [0x555fa6a5c2fa]", "(rocksdb::BlockBasedTableIterator<rocksdb::DataBlockIter, rocksdb::Slice>::InitDataBlock()+0x2bd) [0x555fa704ce5d]", "(rocksdb::BlockBasedTableIterator<rocksdb::DataBlockIter, rocksdb::Slice>::FindKeyForward()+0x8d) [0x555fa704cf5d]", "(()+0x1006d89) [0x555fa6fcad89]", "(rocksdb::MergingIterator::Next()+0x42) [0x555fa7060422]", "(rocksdb::DBIter::FindNextUserEntryInternal(bool, bool)+0xba2) [0x555fa6f595a2]", "(rocksdb::DBIter::Seek(rocksdb::Slice const&)+0x54c) [0x555fa6f5a6dc]", "(RocksDBStore::RocksDBWholeSpaceIteratorImpl::lower_bound(std::string const&, std::string const&)+0x44) [0x555fa69bebf4]", "(BlueStore::OmapIteratorImpl::OmapIteratorImpl(boost::intrusive_ptr<BlueStore::Collection>, boost::intrusive_ptr<BlueStore::Onode>, std::shared_ptr<KeyValueDB::IteratorImpl>)+0x11d) [0x555fa68f603d]", "(BlueStore::get_omap_iterator(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, ghobject_t const&)+0x1ac) [0x555fa6932f9c]", "(PrimaryLogPG::do_osd_ops(PrimaryLogPG::OpContext*, std::vector<OSDOp, std::allocator<OSDOp> >&)+0x1f1b) [0x555fa66696bb]", "(cls_cxx_map_get_vals(void*, std::string const&, std::string const&, unsigned long, std::map<std::string, ceph::buffer::v14_2_0::list, std::less<std::string>, std::allocator<std::pair<std::string const, ceph::buffer::v14_2_0::list> > >*, bool*)+0x1d5) [0x555fa676d365]", "(()+0xb1b2f) [0x7f1250fa6b2f]", "(()+0xbec69) [0x7f1250fb3c69]", "(ClassHandler::ClassMethod::exec(void*, ceph::buffer::v14_2_0::list&, ceph::buffer::v14_2_0::list&)+0x34) [0x555fa6548ef4]", "(PrimaryLogPG::do_osd_ops(PrimaryLogPG::OpContext*, std::vector<OSDOp, std::allocator<OSDOp> >&)+0x149e) [0x555fa6668c3e]", "(PrimaryLogPG::prepare_transaction(PrimaryLogPG::OpContext*)+0x97) [0x555fa667bed7]", "(PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x2f8) [0x555fa667c678]", "(PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x47c6) [0x555fa6682426]", "(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xbd4) [0x555fa6683174]", "(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x1a9) [0x555fa64bc849]", "(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x555fa675b0a2]", "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x9f4) [0x555fa64d9a74]", "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x433) [0x555fa6add613]", "(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x555fa6ae06b0]", "(()+0x7dd5) [0x7f1262176dd5]", "(clone()+0x6d) [0x7f126103cead]" ]
Actions