Project

General

Profile

squeezed_log_for_ticket.txt

log snippet showing issue evolution with comments - Igor Fedotov, 03/14/2022 12:54 PM

Download (9.86 KB)

 
1
//Initial write which allocates [0x2d9f000~e000] and stages deferred write to it
2
2022-03-09T02:25:32.115+0100 7fbba968b700 15 bluestore(/var/lib/ceph/osd/ceph-4) _write 40.5s1_head 1#40:a585d490:::rbd_data.29.a2fb0bc44b89c7.0000000000000008:head# 0xa0000~e000
3
2022-03-09T02:25:32.115+0100 7fbba968b700 20 bluestore(/var/lib/ceph/osd/ceph-4) _do_write 1#40:a585d490:::rbd_data.29.a2fb0bc44b89c7.0000000000000008
4
:head# 0xa0000~e000 - have 0xcd000 (839680) bytes fadvise_flags 0x0 alloc_hint 0x11 expected_object_size 0 expected_write_size 0
5
...
6
2022-03-09T02:25:32.115+0100 7fbba968b700 20 bluestore(/var/lib/ceph/osd/ceph-4) _do_write_big reuse blob Blob(0x55e16f153dc0 blob([!~e000,0x480c66000~2000] csum crc32c/0x1000) use_tracker(0x10*0x1000 0x[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,1000]) SharedBlob(0x55e16f153e30 sbid 0x0)) (0x0~e000)
7
2022-03-09T02:25:32.115+0100 7fbba968b700 20 bluestore(/var/lib/ceph/osd/ceph-4) _do_write_big schedule write big: 0xa0000~e000 reuse Blob(0x55e16f153dc0 blob([!~e000,0x480c66000~2000] csum crc32c/0x1000) use_tracker(0x10*0x1000 0x[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,1000]) SharedBlob(0x55e16f153e30 sbid 0x0))
8
...
9
2022-03-09T02:25:32.115+0100 7fbba968b700 20 bluestore(/var/lib/ceph/osd/ceph-4) _do_alloc_write prealloc [0x2d9f000~e000]
10
2022-03-09T02:25:32.115+0100 7fbba968b700 20 bluestore(/var/lib/ceph/osd/ceph-4) _do_alloc_write blob Blob(0x55e16f153dc0 blob([0x2d9f000~e000,0x480c66000~2000] csum crc32c/0x1000) use_tracker(0x10*0x1000 0x[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,1000]) SharedBlob(0x55e16f153e30 sbid 0x0))
11
2022-03-09T02:25:32.115+0100 7fbba968b700 20 bluestore.blob(0x55e16f153dc0) get_ref 0x0~e000 Blob(0x55e16f153dc0 blob([0x2d9f000~e000,0x480c66000~2000] csum crc32c/0x1000) use_tracker(0x10*0x1000 0x[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,1000]) SharedBlob(0x55e16f153e30 sbid 0x0))
12
2022-03-09T02:25:32.115+0100 7fbba968b700 20 bluestore(/var/lib/ceph/osd/ceph-4) _do_alloc_write  lex 0xa0000~e000: 0x0~e000 Blob(0x55e16f153dc0 blob([0x2d9f000~e000,0x480c66000~2000] csum crc32c/0x1000) use_tracker(0x10*0x1000 0x[1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000]) SharedBlob(0x55e16f153e30 sbid 0x0))
13
...
14
2022-03-09T02:25:32.115+0100 7fbba968b700 20 bluestore(/var/lib/ceph/osd/ceph-4) _do_alloc_write deferring 0xe000 write via deferred
15
...
16
2022-03-09T02:25:32.131+0100 7fbbb7ea8700 20 bluestore(/var/lib/ceph/osd/ceph-4) _deferred_queue txc 0x55e13ef26700 osr 0x55e139c23e40
17
2022-03-09T02:25:32.131+0100 7fbbb7ea8700 20 bluestore.DeferredBatch(0x55e139120c80) prepare_write seq 30 0x2d9f000~e000 crc 11db873c
18
2022-03-09T02:25:32.131+0100 7fbbb7ea8700 10 bluestore(/var/lib/ceph/osd/ceph-4) _txc_state_proc txc 0x55e13f04dc00 deferred_cleanup
19
...
20

    
21
//EC overwrite which triggers the extent in question overwrite and release 
22
2022-03-09T02:25:32.151+0100 7fbba968b700 15 bluestore(/var/lib/ceph/osd/ceph-4) _do_clone_range 40.5s1_head 1#40:a585d490:::rbd_data.29.a2fb0bc44b89c7.0000000000000008:head# -> 
23
    1#40:a585d490:::rbd_data.29.a2fb0bc44b89c7.0000000000000008:head#10bb 0xa0000~e000 ->  0xa0000~e000
24
...
25
2022-03-09T02:25:32.151+0100 7fbba968b700 20 bluestore.extentmap(0x55e16f1452d0) dup  src 0xa0000~10000: 0x0~10000 Blob(0x55e16f153dc0 blob([0x2d9f000~e000,0x480c66000~2000] csum crc32c/0x1000) use_tracker(0x10*0x1000 0x[1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000]) SharedBlob(0x55e16f153e30 sbid 0x0))
26
...
27
2022-03-09T02:25:32.151+0100 7fbba968b700 15 bluestore(/var/lib/ceph/osd/ceph-4) _write 40.5s1_head 1#40:a585d490:::rbd_data.29.a2fb0bc44b89c7.0000000000000008:head# 0xa0000~e000
28
...
29
2022-03-09T02:25:32.203+0100 7fbba968b700 15 bluestore(/var/lib/ceph/osd/ceph-4) _remove 40.5s1_head 1#40:a585d490:::rbd_data.29.a2fb0bc44b89c7.0000000000000008:head#10bb onode 0x55e16f145680 txc 0x55e163e52e00
30
...
31
2022-03-09T02:25:32.203+0100 7fbba968b700 20 bluestore(/var/lib/ceph/osd/ceph-4) _txc_finalize_kv release 0x2d9f000~e000
32
2022-03-09T02:25:32.203+0100 7fbba968b700 10 freelist release 0x2d9f000~e000
33
2022-03-09T02:25:32.203+0100 7fbba968b700 20 freelist _xor first_key 0x2d80000 last_key 0x2d80000
34

    
35

    
36
//deferred_try_submit call from a mempool thread to commit a bunch of deferred writes including the one in question
37
//Notably this is already incorrect due to the relevant release above
38
2022-03-09T02:25:32.779+0100 7fbbb76a7700 20 bluestore(/var/lib/ceph/osd/ceph-4) deferred_try_submit 6 osrs, 19 txcs
39
2022-03-09T02:25:32.779+0100 7fbbb76a7700 20 bluestore(/var/lib/ceph/osd/ceph-4) _deferred_submit_unlock   seq 30 0x2d9f000~e000
40
2022-03-09T02:25:32.779+0100 7fbbb76a7700 20 bluestore(/var/lib/ceph/osd/ceph-4) _deferred_submit_unlock write 0x2d9f000~e000 crc 11db873c
41
2022-03-09T02:25:32.779+0100 7fbbb76a7700 20 bdev(0x55e138d9a400 /var/lib/ceph/osd/ceph-4/block) aio_write 0x2d9f000~e000 (direct)
42
2022-03-09T02:25:32.779+0100 7fbbb76a7700 20 bdev(0x55e138d9a400 /var/lib/ceph/osd/ceph-4/block) aio_write rebuilding buffer to be aligned
43
2022-03-09T02:25:32.779+0100 7fbbb76a7700 20 bdev(0x55e138d9a400 /var/lib/ceph/osd/ceph-4/block) _aio_log_start 0x2d9f000~e000
44
2022-03-09T02:25:32.779+0100 7fbbb76a7700  5 bdev(0x55e138d9a400 /var/lib/ceph/osd/ceph-4/block) aio_write 0x2d9f000~e000 aio 0x55e157c36310
45

    
46
//OSD shutdown, there are no completed deferred write cleanup. Apparently due to the lack of additional regualr BlueStore ops which would trigger that in kv_sync_thread.
47
// Hence deferred write ops are still present in DB and are to be commited on OSD restart
48
2022-03-09T02:25:46.719+0100 7fbbc0962700 -1 received  signal: Terminated from /sbin/init  (PID: 1) UID: 0
49
2022-03-09T02:25:46.719+0100 7fbbc0962700 -1 osd.4 6164 *** Got signal Terminated ***
50
2022-03-09T02:25:46.719+0100 7fbbc0962700 -1 osd.4 6164 *** Immediate shutdown (osd_fast_shutdown=true) ***
51

    
52
//OSD is restarted
53
2022-03-10T16:55:38.775+0100 7fa8e6ab4f00  0 ceph version 16.2.6 (1a6b9a05546f335eeeddb460fdc89caadf80ac7a) pacific (stable), process ceph-osd, pid 1681
54

    
55
//BlueFS starts using an extent [1:0x2da0000~10000] which partially overlaps with the original extent [0x2d9f000~e000] still attached to a persistent deferred write op
56
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 10 bluefs _flush_range 0x55669e45e6c0 pos 0x0 0x0~858 to file(ino 2186 size 0x0 mtime
57
2022-03-10T16:56:19.070476+0100 allocated 0 extents [])
58
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 10 bluefs _allocate len 0x858 from 1
59
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 10 HybridAllocator allocate want 0x10000 unit 0x10000 max_alloc_size 0x10000 hint 0x0
60
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 20 AvlAllocator _allocate first fit=47841280 size=65536
61
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 20 bluefs _flush_range file now, unflushed file(ino 2186 size 0x858 mtime 2022-03-10T16:56:19.070476+0100 allocated 10000 extents [1:0x2da0000~10000])
62
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 20 bluefs _flush_range in 1:0x2da0000~10000 x_off 0x0
63
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 20 bluefs flush_buffer caching tail of 0x858 and padding block with 0x7a8 buffer.length() 0
64
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 20 bdev(0x55669f18b400 /var/lib/ceph/osd/ceph-4/block) aio_write 0x2da0000~1000 (buffered)
65
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 20 bdev(0x55669f18b400 /var/lib/ceph/osd/ceph-4/block) _aio_log_start 0x2da0000~1000
66
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00  5 bdev(0x55669f18b400 /var/lib/ceph/osd/ceph-4/block) _sync_write 0x2da0000~1000 (buffered)
67
2022-03-10T16:56:19.064+0100 7fa8e6ab4f00 20 bdev(0x55669f18b400 /var/lib/ceph/osd/ceph-4/block) _aio_log_finish 1 0x2da0000~1000
68

    
69
//another deferred write ops commit on OSD restart,
70
// this would overwrite BlueFS data written above
71
2022-03-10T16:56:19.272+0100 7fa8e6ab4f00 20 bluestore(/var/lib/ceph/osd/ceph-4) _deferred_replay replay 0x000000000000001E
72
2022-03-10T16:56:19.272+0100 7fa8e6ab4f00 20 bluestore(/var/lib/ceph/osd/ceph-4) _txc_create osr 0x5566c5f2ef20 = 0x55669e4c1c0
73
0 seq 2
74
2022-03-10T16:56:19.272+0100 7fa8e6ab4f00 10 bluestore(/var/lib/ceph/osd/ceph-4) _txc_state_proc txc 0x55669e4c1c00 kv_done
75
2022-03-10T16:56:19.272+0100 7fa8e6ab4f00 20 bluestore(/var/lib/ceph/osd/ceph-4) _deferred_queue txc 0x55669e4c1c00 osr 0x5566c
76
5f2ef20
77
2022-03-10T16:56:19.272+0100 7fa8e6ab4f00 20 bluestore.DeferredBatch(0x5566a0914780) prepare_write seq 30 0x2d9f000~e000 crc 11db873c
78
...
79
2022-03-10T16:56:19.284+0100 7fa8e6ab4f00 20 bluestore(/var/lib/ceph/osd/ceph-4) _deferred_submit_unlock   seq 30 0x2d9f000~e000
80
2022-03-10T16:56:19.284+0100 7fa8e6ab4f00 20 bluestore(/var/lib/ceph/osd/ceph-4) _deferred_submit_unlock write 0x2d9f000~e000 crc 11db873c
81
2022-03-10T16:56:19.284+0100 7fa8e6ab4f00 20 bdev(0x55669e4df400 /var/lib/ceph/osd/ceph-4/block) aio_write 0x2d9f000~e000 (direct)
82
2022-03-10T16:56:19.284+0100 7fa8e6ab4f00 20 bdev(0x55669e4df400 /var/lib/ceph/osd/ceph-4/block) aio_write rebuilding buffer to be aligned
83
2022-03-10T16:56:19.284+0100 7fa8e6ab4f00 20 bdev(0x55669e4df400 /var/lib/ceph/osd/ceph-4/block) _aio_log_start 0x2d9f000~e000
84
2022-03-10T16:56:19.284+0100 7fa8e6ab4f00  5 bdev(0x55669e4df400 /var/lib/ceph/osd/ceph-4/block) aio_write 0x2d9f000~e000 aio 0x5566a0d62c10
85

    
86

    
87
//BlueFS is reading from the invalidated extent
88
2022-03-10T17:18:49.849+0100 7f6f7fac6700 10 bluefs open_for_read db/002193.sst (random)
89
2022-03-10T17:18:49.849+0100 7f6f7fac6700 10 bluefs open_for_read h 0x5640531b3000 on file(ino 2186 size 0x858 mtime 2022-03-10
90
T16:56:19.071331+0100 allocated 10000 extents [1:0x2da0000~10000])
91
2022-03-10T17:18:49.849+0100 7f6f7fac6700 20 bluefs _read fetching 0x0~1000 of 1:0x2da0000~10000
92
2022-03-10T17:18:49.849+0100 7f6f7fac6700  5 bdev(0x564051564800 /var/lib/ceph/osd/ceph-4/block) read 0x2da0000~1000 (buffered)
93
2022-03-10T17:18:49.849+0100 7f6f7fac6700 20 bdev(0x564051564800 /var/lib/ceph/osd/ceph-4/block) _aio_log_start 0x2da0000~1000
94
...
95

    
96
// And the final
97
2022-03-10T17:18:49.917+0100 7f6f8bdc6f00 -1 rocksdb: Corruption: Bad table magic number: expected 9863518390377041911, found 0 in db/002193.sst