Bug #54509
closedFAILED ceph_assert due to issue manifest API to the original object
0%
Description
1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x152) [0x55f1f3750606]
2: ceph-osd(+0x5bf827) [0x55f1f3750827]
3: (PrimaryLogPG::get_object_context(hobject_t const&, bool, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > const*)+0x22f) [0x55f1f39670df]
4: (PrimaryLogPG::get_adjacent_clones(std::shared_ptr<ObjectContext>, std::shared_ptr<ObjectContext>&, std::shared_ptr<ObjectContext>&)+0xc5) [0x55f1f3968845]
5: (PrimaryLogPG::inc_refcount_by_set(PrimaryLogPG::OpContext*, object_manifest_t&, OSDOp&)+0xd3) [0x55f1f396c4d3]
6: (PrimaryLogPG::do_osd_ops(PrimaryLogPG::OpContext*, std::vector<OSDOp, std::allocator<OSDOp> >&)+0xe634) [0x55f1f39b4234]
7: (PrimaryLogPG::prepare_transaction(PrimaryLogPG::OpContext*)+0x177) [0x55f1f39babd7]
8: (PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x31d) [0x55f1f39bccbd]
9: (PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x2dbb) [0x55f1f39c674b]
10: (PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xd1c) [0x55f1f39cd93c]
11: (OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x309) [0x55f1f3856c99]
12: (ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x68) [0x55f1f3ab9a18]
13: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xc28) [0x55f1f3873788]
14: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0x55f1f3f105c4]
15: (ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x55f1f3f11964]
16: (Thread::_entry_func(void*)+0xd) [0x55f1f3ef768d]
17: /lib64/libpthread.so.0(+0x814a) [0x7ffaa6d8a14a]
18: clone()
/a/yuriw-2022-02-21_15:40:41-rados-wip-yuri4-testing-2022-02-18-0800-distro-default-smithi/6698453
Updated by Myoungwon Oh about 2 years ago
Updated by Laura Flores about 2 years ago
/a/yuriw-2022-04-22_21:06:04-rados-wip-yuri3-testing-2022-04-22-0534-quincy-distro-default-smithi/6802065/remote/smithi029/crash/2022-04-23T04:49:22.650441Z_3a8f9ca0-e2a7-4be0-82b8-f1b205b03176
ceph-17.2.0-58-ge2161634/src/osd/PrimaryLogPG.cc: In function 'ObjectContextRef PrimaryLogPG::get_object_context(const hobject_t&, bool, const std::map<std::__cxx11::basic_string<char>, ceph::buffer::v15_2_0::list, std::less<void> >*)' thread 33f26700 time 2022-04-23T04:49:22.488994+0000
/home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.2.0-58-ge2161634/rpm/el8/BUILD/ceph-17.2.0-58-ge2161634/src/osd/PrimaryLogPG.cc: 11820: FAILED ceph_assert(attrs || !recovery_state.get_pg_log().get_missing().is_missing(soid) || (it_objects != recovery_state.get_pg_log().get_log().objects.end() && it_objects->second->op == pg_log_entry_t::LOST_REVERT))
ceph version 17.2.0-58-ge2161634 (e21616343a03209745d1ef7ffb6304159addf4fd) quincy (stable)
1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x152) [0x6df678]
2: ceph-osd(+0x5d7899) [0x6df899]
3: (PrimaryLogPG::get_object_context(hobject_t const&, bool, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > const*)+0x22f) [0x90264f]
4: (PrimaryLogPG::get_manifest_ref_count(std::shared_ptr<ObjectContext>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, boost::intrusive_ptr<OpRequest>)+0x355) [0x9240b5]
5: (cls_get_manifest_ref_count(void*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x76) [0x796a16]
6: /usr/lib64/rados-classes/libcls_cas.so(+0xb914) [0x1cd2b914]
7: ceph-osd(+0x77ab2c) [0x882b2c]
8: (ClassHandler::ClassMethod::exec(void*, ceph::buffer::v15_2_0::list&, ceph::buffer::v15_2_0::list&)+0x5e) [0x88335e]
9: (PrimaryLogPG::do_osd_ops(PrimaryLogPG::OpContext*, std::vector<OSDOp, std::allocator<OSDOp> >&)+0x17a2) [0x943542]
10: (PrimaryLogPG::prepare_transaction(PrimaryLogPG::OpContext*)+0x177) [0x956927]
11: (PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x338) [0x95a5a8]
12: (PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x395f) [0x96318f]
13: (PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xe2e) [0x969dbe]
14: (OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x332) [0x7ea942]
15: (ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x6f) [0xaaf17f]
16: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xaf8) [0x8092c8]
17: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0xf0ef44]
18: (ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0xf102e4]
19: /lib64/libpthread.so.0(+0x81cf) [0x68231cf]
20: clone()
Also in /a/lflores-2022-04-26_15:57:44-rados-wip-yuri3-testing-2022-04-22-0534-quincy-distro-default-smithi/6806798 on the same branch.
Updated by Laura Flores about 2 years ago
- Status changed from Fix Under Review to Pending Backport
- Backport set to quincy
Updated by Backport Bot about 2 years ago
- Copied to Backport #55439: quincy: FAILED ceph_assert due to issue manifest API to the original object added
Updated by Neha Ojha about 2 years ago
Myoungwon Oh: Seeing this in pacific as well, can you confirm if it is the same issue?
/a/yuriw-2022-04-30_17:01:51-rados-wip-yuri4-testing-2022-04-29-1830-pacific-distro-default-smithi/6816223
2022-05-01T03:52:23.606 INFO:tasks.ceph.osd.7.smithi099.stderr:2022-05-01T03:52:23.584+0000 39793700 -1 /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.7-981-g5fb400bd/rpm/el8/BUILD/ceph-16.2.7-981-g5fb400bd/src/osd/PrimaryLogPG.cc: In function 'ObjectContextRef PrimaryLogPG::get_object_context(const hobject_t&, bool, const std::map<std::__cxx11::basic_string<char>, ceph::buffer::v15_2_0::list>*)' thread 39793700 time 2022-05-01T03:52:23.461746+0000 2022-05-01T03:52:23.606 INFO:tasks.ceph.osd.7.smithi099.stderr:/home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.7-981-g5fb400bd/rpm/el8/BUILD/ceph-16.2.7-981-g5fb400bd/src/osd/PrimaryLogPG.cc: 11485: FAILED ceph_assert(attrs || !recovery_state.get_pg_log().get_missing().is_missing(soid) || (it_objects != recovery_state.get_pg_log().get_log().objects.end() && it_objects->second->op == pg_log_entry_t::LOST_REVERT)) 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: ceph version 16.2.7-981-g5fb400bd (5fb400bd707676c39bd35235907ae7d994946974) pacific (stable) 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: 1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x158) [0x687720] 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: 2: ceph-osd(+0x57f93a) [0x68793a] 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: 3: (PrimaryLogPG::get_object_context(hobject_t const&, bool, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, ceph::buffer::v15_2_0::list> > > const*)+0x6ff) [0x8a568f] 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: 4: (PrimaryLogPG::get_adjacent_clones(std::shared_ptr<ObjectContext>, std::shared_ptr<ObjectContext>&, std::shared_ptr<ObjectContext>&)+0xc5) [0x8a6995] 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: 5: (PrimaryLogPG::get_manifest_ref_count(std::shared_ptr<ObjectContext>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, boost::intrusive_ptr<OpRequest>)+0x433) [0x8c0293] 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: 6: (cls_get_manifest_ref_count(void*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x76) [0x73e006] 2022-05-01T03:52:23.607 INFO:tasks.ceph.osd.7.smithi099.stderr: 7: /usr/lib64/rados-classes/libcls_cas.so(+0xbdc4) [0x25e87dc4] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 8: ceph-osd(+0x72155c) [0x82955c] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 9: (ClassHandler::ClassMethod::exec(void*, ceph::buffer::v15_2_0::list&, ceph::buffer::v15_2_0::list&)+0x5e) [0x8298ce] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 10: (PrimaryLogPG::do_osd_ops(PrimaryLogPG::OpContext*, std::vector<OSDOp, std::allocator<OSDOp> >&)+0x17b2) [0x8e3972] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 11: (PrimaryLogPG::prepare_transaction(PrimaryLogPG::OpContext*)+0x177) [0x8f7217] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 12: (PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x326) [0x8f9306] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 13: (PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x2de5) [0x902cc5] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 14: (PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xd1c) [0x90a00c] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 15: (OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x309) [0x790fa9] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 16: (ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x68) [0x9f00b8] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 17: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xc28) [0x7ae4a8] 2022-05-01T03:52:23.608 INFO:tasks.ceph.osd.7.smithi099.stderr: 18: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) [0xe2b4c4] 2022-05-01T03:52:23.609 INFO:tasks.ceph.osd.7.smithi099.stderr: 19: (ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0xe2e3a4] 2022-05-01T03:52:23.609 INFO:tasks.ceph.osd.7.smithi099.stderr: 20: /lib64/libpthread.so.0(+0x81cf) [0xc8381cf] 2022-05-01T03:52:23.609 INFO:tasks.ceph.osd.7.smithi099.stderr: 21: clone()
Updated by Myoungwon Oh about 2 years ago
I think this is the same issue as https://tracker.ceph.com/issues/50806.
This issue was already fixed, but not backported yet (pending backport).
Updated by Myoungwon Oh about 2 years ago
Updated by Neha Ojha about 2 years ago
Myoungwon Oh wrote:
Thanks for looking into it and creating the backport!
Updated by Ronen Friedman almost 2 years ago
@Myoungwon Oh Oh - can you take a look at
I'm seeing something that looks disturbingly similar:
{
"crash_id": "2022-07-06T07:44:33.131281Z_a7155c84-92d1-45f0-b692-840bec5c29fa",
"timestamp": "2022-07-06T07:44:33.131281Z",
"process_name": "ceph-osd",
"entity_name": "osd.6",
"ceph_version": "17.0.0-13379-g9b018aeb",
"utsname_hostname": "smithi084",
"utsname_sysname": "Linux",
"utsname_release": "5.4.0-121-generic",
"utsname_version": "#137-Ubuntu SMP Wed Jun 15 13:33:07 UTC 2022",
"utsname_machine": "x86_64",
"os_name": "Ubuntu",
"os_id": "ubuntu",
"os_version_id": "20.04",
"os_version": "20.04.4 LTS (Focal Fossa)",
"assert_condition": "attrs || !recovery_state.get_pg_log().get_missing().is_missing(soid) || (it_objects != recovery_state.get_pg_log().get_log().objects.end() && it_objects->second->op pg_log_entry_t::LOST_REVERT)",
"assert_func": "ObjectContextRef PrimaryLogPG::get_object_context(const hobject_t&, bool, const std::map<std::__cxx11::basic_string<char>, ceph::buffer::v15_2_0::list, std::less<void> >)",
"assert_file": "/build/ceph-17.0.0-13379-g9b018aeb/src/osd/PrimaryLogPG.cc",
"assert_line": 11804,
"assert_thread_name": "tp_osd_tp",
"assert_msg": "/build/ceph-17.0.0-13379-g9b018aeb/src/osd/PrimaryLogPG.cc: In function 'ObjectContextRef PrimaryLogPG::get_object_context(const hobject_t&, bool, const std::map<std::__cxx11::basic_string<char>, ceph::buffer::v15_2_0::list, std::less<void> >)' thread 7fc7bcf69700 time 20\
22-07-06T07:44:33.111747+0000\n/build/ceph-17.0.0-13379-g9b018aeb/src/osd/PrimaryLogPG.cc: 11804: FAILED ceph_assert(attrs || !recovery_state.get_pg_log().get_missing().is_missing(soid) || (it_objects != recovery_state.get_pg_log().get_log().objects.end() && it_objects->second->op pg_log_\
entry_t::LOST_REVERT))\n",
"backtrace": [
"/lib/x86_64-linux-gnu/libpthread.so.0(+0x14420) [0x7fc7db64b420]",
"gsignal()",
"abort()",
"(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1b7) [0x561566b6af2a]",
"ceph-osd(+0xc6c0d4) [0x561566b6b0d4]",
"(PrimaryLogPG::get_object_context(hobject_t const&, bool, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, ceph::buffer::v15_2_0::list, std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::al\
locator<char> > const, ceph::buffer::v15_2_0::list> > > const*)+0x247) [0x561566d90097]",
"(PrimaryLogPG::get_adjacent_clones(std::shared_ptr<ObjectContext>, std::shared_ptr<ObjectContext>&, std::shared_ptr<ObjectContext>&)+0x1a2) [0x561566d92332]",
"(PrimaryLogPG::get_manifest_ref_count(std::shared_ptr<ObjectContext>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, boost::intrusive_ptr<OpRequest>)+0x5cd) [0x561566db11ed]",
"(cls_get_manifest_ref_count(void*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x76) [0x561566beb1f6]",
"/usr/lib/rados-classes/libcls_cas.so(+0xb069) [0x7fc7cdbef069]",
"ceph-osd(+0xe0709d) [0x561566d0609d]",
"(ClassHandler::ClassMethod::exec(void*, ceph::buffer::v15_2_0::list&, ceph::buffer::v15_2_0::list&)+0x67) [0x561566d063a7]",
"(PrimaryLogPG::do_osd_ops(PrimaryLogPG::OpContext*, std::vector<OSDOp, std::allocator<OSDOp> >&)+0xa89) [0x561566dd95b9]",
"(PrimaryLogPG::prepare_transaction(PrimaryLogPG::OpContext*)+0x9f) [0x561566dee30f]",
"(PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x393) [0x561566df3013]",
"(PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x35f3) [0x561566df7f23]",
"(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xe61) [0x561566e03161]",
"(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x1c0) [0x561566c3c390]",
"(ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x66) [0x561566f73ed6]",
"(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x8d2) [0x561566c68a12]",
"(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x403) [0x561567410723]",
"(ShardedThreadPool::WorkThreadSharded::entry()+0x14) [0x561567413764]",
"/lib/x86_64-linux-gnu/libpthread.so.0(+0x8609) [0x7fc7db63f609]",
"clone()"
]
}
Updated by Radoslaw Zarzynski about 1 year ago
- Status changed from Pending Backport to Resolved