Bug #1973
osd: segfault in ReplicatedPG::remove_object_with_snap_hardlinks
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
/var/lib/teuthworker/archive/nightly_coverage_2012-01-23-b/8775
2012-01-23 14:26:52.404921 7eff6c83f700 log [ERR] : 0.15 missing primary copy of eb0288b5/sepia4121727-175/9, unfound *** Caught signal (Segmentation fault) ** in thread 7eff6d040700 ceph version 0.40-213-g7ce544e (commit:7ce544e640d45e901ef67e8268c963c958a66eff) 1: /tmp/cephtest/binary/usr/local/bin/ceph-osd() [0x6c1de4] 2: (()+0xfb40) [0x7eff7c5d9b40] 3: (ReplicatedPG::remove_object_with_snap_hardlinks(ObjectStore::Transaction&, hobject_t const&)+0x353) [0x4c4093] 4: (ReplicatedPG::sub_op_push(MOSDSubOp*)+0x1091) [0x4e47e1] 5: (ReplicatedPG::do_sub_op(MOSDSubOp*)+0x2c3) [0x507863] 6: (OSD::dequeue_op(PG*)+0x1e4) [0x59aeb4] 7: (OSD::OpWQ::_process(PG*)+0x15) [0x5f70d5] 8: (ThreadPool::WorkQueue<PG>::_void_process(void*)+0x12) [0x5cb4a2] 9: (ThreadPool::worker()+0x7e3) [0x6abc43] 10: (ThreadPool::WorkThread::entry()+0x15) [0x5d4a35] 11: (Thread::_entry_func(void*)+0x12) [0x637042] 12: (()+0x7971) [0x7eff7c5d1971] 13: (clone()+0x6d) [0x7eff7ac5c92d]
(gdb) bt #0 0x00007eff7c5d9a0b in raise (sig=<value optimized out>) at ../nptl/sysdeps/unix/sysv/linux/pt-raise.c:42 #1 0x00000000006c191b in reraise_fatal (signum=21458) at global/signal_handler.cc:59 #2 0x00000000006c210c in handle_fatal_signal (signum=<value optimized out>) at global/signal_handler.cc:109 #3 <signal handler called> #4 0x00000000004c4093 in ~coll_t (this=0x211c400, t=..., soid=...) at osd/osd_types.h:244 #5 ReplicatedPG::remove_object_with_snap_hardlinks (this=0x211c400, t=..., soid=...) at osd/ReplicatedPG.cc:5690 #6 0x00000000004e47e1 in ReplicatedPG::sub_op_push (this=<value optimized out>, op=<value optimized out>) at osd/ReplicatedPG.cc:4599 #7 0x0000000000507863 in ReplicatedPG::do_sub_op (this=0x211c400, op=0x20fd600) at osd/ReplicatedPG.cc:878 #8 0x000000000059aeb4 in OSD::dequeue_op (this=0x2070000, pg=0x211c400) at osd/OSD.cc:5585 #9 0x00000000005f70d5 in OSD::OpWQ::_process(PG*) () #10 0x00000000005cb4a2 in ThreadPool::WorkQueue<PG>::_void_process(void*) () #11 0x00000000006abc43 in ThreadPool::worker (this=0x2070418) at common/WorkQueue.cc:54 #12 0x00000000005d4a35 in ThreadPool::WorkThread::entry() () #13 0x0000000000637042 in Thread::_entry_func (arg=0x7eff6d03ee50) at common/Thread.cc:41 #14 0x00007eff7c5d1971 in start_thread (arg=<value optimized out>) at pthread_create.c:304 #15 0x00007eff7ac5c92d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #16 0x0000000000000000 in ?? () (gdb) f 5 #5 ReplicatedPG::remove_object_with_snap_hardlinks (this=0x211c400, t=..., soid=...) at osd/ReplicatedPG.cc:5690 5690 osd/ReplicatedPG.cc: No such file or directory. in osd/ReplicatedPG.cc (gdb) p oi.snaps $1 = {<std::_Vector_base<snapid_t, std::allocator<snapid_t> >> = {_M_impl = {<std::allocator<snapid_t>> = {<__gnu_cxx::new_allocator<snapid_t>> = {<No data fields>}, <No data fields>}, _M_start = 0x0, _M_finish = 0x0, _M_end_of_storage = 0x0}}, <No data fields>} (gdb) p soid.snap $2 = {val = 9} (gdb) p oi $3 = {soid = {oid = {name = {static npos = <optimized out>, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x297ea98 "sepia4121727-175"}}}, snap = {val = 18446744073709551614}, hash = 3942811829, max = false, key = {static npos = <optimized out>, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0xb27098 ""}}}, oloc = {pool = 0, preferred = -1, key = {static npos = <optimized out>, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0xb27098 ""}}}, category = {static npos = <optimized out>, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0xb27098 ""}}, version = {version = 34, epoch = 52, __pad = 0}, prior_version = {version = 14, epoch = 13, __pad = 0}, user_version = {version = 14, epoch = 13, __pad = 0}, last_reqid = {name = {_type = 8 '\b', _num = 4106, static TYPE_MON = 1, static TYPE_MDS = 2, static TYPE_OSD = 4, static TYPE_CLIENT = 8, static NEW = -1}, tid = 467, inc = 0}, size = 1129395, mtime = {tv = {tv_sec = 1327357382, tv_nsec = 507959000}}, lost = false, wrlock_by = {name = {_type = 0 '\000', _num = 0, static TYPE_MON = 1, static TYPE_MDS = 2, static TYPE_OSD = 4, static TYPE_CLIENT = 8, static NEW = -1}, tid = 0, inc = 0}, snaps = {<std::_Vector_base<snapid_t, std::allocator<snapid_t> >> = {_M_impl = {<std::allocator<snapid_t>> = {<__gnu_cxx::new_allocator<snapid_t>> = {<No data fields>}, <No data fields>}, _M_start = 0x0, _M_finish = 0x0, _M_end_of_storage = 0x0}}, <No data fields>}, truncate_seq = 0, truncate_size = 0, watchers = {_M_t = { _M_impl = {<std::allocator<std::_Rb_tree_node<std::pair<entity_name_t const, watch_info_t> > >> = {<__gnu_cxx::new_allocator<std::_Rb_tree_node<std::pair<entity_name_t const, watch_info_t> > >> = {<No data fields>}, <No data fields>}, _M_key_compare = {<std::binary_function<entity_name_t, entity_name_t, bool>> = {<No data fields>}, <No data fields>}, _M_header = {_M_color = std::_S_red, _M_parent = 0x0, _M_left = 0x7eff6d03f118, _M_right = 0x7eff6d03f118}, _M_node_count = 0}}}} (gdb) p soid $5 = (const hobject_t &) @0x20fd7a0: {oid = {name = {static npos = <optimized out>, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x252fe28 "sepia4121727-175"}}}, snap = {val = 9}, hash = 3942811829, max = false, key = {static npos = <optimized out>, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0xb27098 ""}}}
kernel: sha1: 39fe85deecf519da4da0dba31cc8e51951fb1a2f nuke-on-error: true overrides: ceph: btrfs: 1 coverage: true log-whitelist: - clocks not synchronized sha1: 7ce544e640d45e901ef67e8268c963c958a66eff roles: - - mon.0 - mds.a - osd.0 - osd.1 - osd.2 - - mon.1 - mon.2 - client.0 - osd.3 - osd.4 - osd.5 tasks: - chef: null - ceph: log-whitelist: - wrongly marked me down or wrong addr - thrashosds: null - rados: clients: - client.0 objects: 500 op_weights: delete: 50 read: 100 snap_create: 50 snap_remove: 50 snap_rollback: 50 write: 100 ops: 4000
History
#1 Updated by Sage Weil almost 12 years ago
- Description updated (diff)
#2 Updated by Sage Weil almost 12 years ago
this is the _9 snap object_info_t:
fatty:src 09:01 PM $ ./ceph-dencoder type object_info_t import /tmp/a decode dump_json { "oid": "eb0288b5\/sepia4121727-175\/head", "locator": "@0", "category": "", "version": "52'34", "prior_version": "13'14", "last_reqid": "client.4106.0:467", "size": 1129395, "mtime": "2012-01-23 14:23:02.507959", "lost": 0, "wrlock_by": "unknown.0.0:0", "snaps": [], "truncate_seq": 0, "truncate_size": 0, "watchers": {}}
and this is the _head object:
fatty:src 09:02 PM $ ./ceph-dencoder type object_info_t import /tmp/b decode dump_json { "oid": "eb0288b5\/sepia4121727-175\/head", "locator": "@0", "category": "", "version": "52'38", "prior_version": "52'37", "last_reqid": "client.4106.0:1382", "size": 3885467, "mtime": "2012-01-23 14:25:43.788203", "lost": 0, "wrlock_by": "unknown.0.0:0", "snaps": [], "truncate_seq": 0, "truncate_size": 0, "watchers": {}}
#3 Updated by Sage Weil almost 12 years ago
- Priority changed from High to Normal
#4 Updated by Sage Weil almost 12 years ago
- Status changed from New to Can't reproduce
let's chalk this up to the bad object_info_t