1H - Inline data support » History » Version 1
Jessica Mack, 06/22/2015 04:46 AM
1 | 1 | Jessica Mack | h1. 1H - Inline data support |
---|---|---|---|
2 | |||
3 | h3. Live Pad |
||
4 | |||
5 | The live pad can be found here: "[pad]":http://pad.ceph.com/p/Inline_data_support_for_Ceph |
||
6 | |||
7 | h3. Summit Snapshot |
||
8 | |||
9 | Coding tasks |
||
10 | # Insert extended attribute of file for MDS to store data of small files |
||
11 | ## piggyback on the File cap bits, existing cap writeback mechanism |
||
12 | # MDS uninline file content when it goes into the MIX state |
||
13 | # add fields to MClientCaps |
||
14 | # Inline data is returned to the client via encode_inodestat() (used by lookup, readdir, stat, open, etc.) |
||
15 | # MDS would store inline data inside inode_t (bufferlist inline_data) |
||
16 | # Client (libcephfs, ceph-fuse) |
||
17 | # if size is small and we are flushing, flush inline to mds |
||
18 | ## prototype and refine protocol changes |
||
19 | # Linux kernel client |
||
20 | ## read side |
||
21 | ### copy into page cache from inode buffer from readpage() |
||
22 | ## write side |
||
23 | ### writepage[s]() .. |
||
24 | ### begin_page_writeback() ???? somethign like that... set the writeback bit, lock page |
||
25 | ### if (size is small and we want to inline) { |
||
26 | ### copy into the inode buffer |
||
27 | ### trigger mds cap flush |
||
28 | ### wait for flush |
||
29 | ### } else { |
||
30 | ### do the regular thing |
||
31 | ### } |
||
32 | ### end_page_writeback() |
||
33 | |||
34 | Documentation tasks |
||
35 | # Document the communication protocol |
||
36 | |||
37 | 1 Client side |
||
38 | |||
39 | 1.1 ceph_write_end() |
||
40 | <pre> |
||
41 | if (inode->status == INLINED) { |
||
42 | if (write_pos < PAGE_SIZE) { |
||
43 | write_page_to_inode(); |
||
44 | err = mark_inode_dirty(); |
||
45 | if (err == ESTATUS) // status has changed to NOTINLINING or NOTINLINED |
||
46 | write_page_to_osd(); |
||
47 | return; |
||
48 | } |
||
49 | if (write_pos > PAGE_SIZE) { |
||
50 | inode->status = NOTINLINING; |
||
51 | mark_inode_dirty(); // ansynchoronously tell mds to change status to NOTINLINING |
||
52 | } |
||
53 | if (the interval [write_pos, write_pos + write_len] overlap with the interval [0, PAGE_SIZE]) { |
||
54 | inode->status = NOTINLINED; |
||
55 | mark_inode_dirty(); |
||
56 | } |
||
57 | } |
||
58 | if (inode->status == NOTINLINING) { |
||
59 | if (the interval [write_pos, write_pos + write_len] overlap with the interval [0, PAGE_SIZE]) { |
||
60 | inode->status = NOTINLINED; |
||
61 | mark_inode_dirty(); |
||
62 | } |
||
63 | } |
||
64 | </pre> |
||
65 | 1.2 write_page() |
||
66 | <pre> |
||
67 | if (page->index == 0 && inode->status == INLINED) { // for mmap(), it won't go through write_end |
||
68 | err = write_page_to_inode(); |
||
69 | mark_inode_dirty(); |
||
70 | } |
||
71 | write_page_to_osd(); |
||
72 | </pre> |
||
73 | 1.3 read_page() |
||
74 | <pre> |
||
75 | if (page->index == 0 && (inode->status == INLINED || inode->status == NOTINLING)) { |
||
76 | err = copy_data_from_inode(); |
||
77 | if (err == ESTATUS) // status has changed to NOTINLINED |
||
78 | read_page_from_osd(); |
||
79 | return; |
||
80 | } |
||
81 | read_page_from_osd(); |
||
82 | </pre> |