Bug #64390
Updated by Dhairya Parmar 3 months ago
test case: <pre><code class="cpp"> TEST_F(TestClient, LlreadvLlwritevDataPoolFull) { /* Test perfoming async I/O after filling the fs and make sure it handles the read/write gracefully */ int mypid = getpid(); char filename[256]; client->unmount(); TearDown(); SetUp(); sprintf(filename, "test_llreadvllwritevdatapoolfullfile%u", mypid); Inode *root, *file; root = client->get_root(); ASSERT_NE(root, (Inode *)NULL); Fh *fh; struct ceph_statx stx; ASSERT_EQ(0, client->ll_createx(root, filename, 0666, O_RDWR | O_CREAT | O_TRUNC, &file, &fh, &stx, 0, 0, myperm)); struct statvfs stbuf; int64_t rc; rc = client->ll_statfs(root, &stbuf, myperm); ASSERT_EQ(rc, 0); int64_t fs_available_space = stbuf.f_bfree * stbuf.f_bsize; // ASSERT_GT(fs_available_space, 0); const int64_t BUFSIZE = 1024 * 1024 * 1024; int64_t bytes_written = 0, offset = 0; char* buf = new char[BUFSIZE]; char* small_buf = NULL; memset(buf, 0xCC, BUFSIZE); while(fs_available_space) { if (fs_available_space >= BUFSIZE) { bytes_written = client->ll_write(fh, offset, BUFSIZE, buf); ASSERT_GT(bytes_written, 0); offset += BUFSIZE; fs_available_space -= BUFSIZE; } else { small_buf = new char[fs_available_space]; memset(small_buf, 0xDD, fs_available_space); bytes_written = client->ll_write(fh, offset, fs_available_space, small_buf); ASSERT_GT(bytes_written, 0); break; } } std::unique_ptr<C_SaferCond> writefinish = nullptr; std::unique_ptr<C_SaferCond> readfinish = nullptr; writefinish.reset(new C_SaferCond("test-nonblocking-writefinish-datapool-full")); readfinish.reset(new C_SaferCond("test-nonblocking-readfinish-datapool-full")); char* out_buf_0 = new char[BUFSIZE]; memset(out_buf_0, 0xDD, BUFSIZE); char* out_buf_1 = new char[BUFSIZE]; memset(out_buf_1, 0xFF, BUFSIZE); char* out_buf_2 = new char[BUFSIZE]; memset(out_buf_2, 0xFF, BUFSIZE); char* out_buf_3 = new char[BUFSIZE]; memset(out_buf_3, 0xFF, BUFSIZE); char* out_buf_4 = new char[BUFSIZE]; memset(out_buf_4, 0xFF, BUFSIZE); char* out_buf_5 = new char[BUFSIZE]; memset(out_buf_5, 0xFF, BUFSIZE); struct iovec iov_out[6] = { {out_buf_0, BUFSIZE}, {out_buf_1, BUFSIZE}, {out_buf_2, BUFSIZE}, {out_buf_3, BUFSIZE}, {out_buf_4, BUFSIZE}, {out_buf_5, BUFSIZE}, }; bufferlist bl; rc = client->ll_preadv_pwritev(fh, iov_out, 6, 0, true, writefinish.get(), nullptr); ASSERT_EQ(rc, 0); bytes_written = writefinish->wait(); ASSERT_EQ(bytes_written, -CEPHFS_ENOSPC); client->ll_release(fh); ASSERT_EQ(0, client->ll_unlink(root, filename, myperm)); delete[] buf; delete[] small_buf; delete[] out_buf_0; delete[] out_buf_1; delete[] out_buf_2; delete[] out_buf_3; delete[] out_buf_4; delete[] out_buf_5; } </code></pre> firstly the assertion fails after the async write call <pre><code class="text"> 2024-02-12T19:09:43.795+0530 7f84bac686c0 19 client.4304 C_Write_Finisher::try_complete this 0x5594b702bcc0 onuninlinefinished 1 iofinished 1 iofinished_r 2147483647 fsync_finished 1 2024-02-12T19:09:43.795+0530 7f84bac686c0 19 client.4304 complete with iofinished_r 2147483647 /home/dparmar/CephRepoForRunningTestsLocally/ceph/src/test/client/nonblocking.cc:800: Failure Expected equality of these values: bytes_written Which is: 2147483647 -28 </code></pre> i expected the API to return ENOSPC but it returned be 2GiB i.e. 33% data was written (shouldn't happen since I had filled up all the available space in the first place) Do we get the ENOSPC error when releasing file handle after this: <pre><code class="text"> 2024-02-12T19:09:43.795+0530 7f84bf65c9c0 1 client.4304 _release_fh 0x5594b6f277f0 on inode 0x10000000000.head(faked_ino=0 nref=8 ll_ref=1 cap_refs={4=0,1024=0,4096=0,8192=0} open={3=0} mode=100666 size=106287857664/110582824960 nlink=1 btime=2024-02-12T18:42:52.646736+0530 mtime=2024-02-12T19:09:43.796040+0530 ctime=2024-02-12T19:09:43.796040+0530 change_attr=100 caps=p(0=p) flushing_caps=Fw objectset[0x10000000000 ts 0/0 objects 1000 dirty_or_tx 0] parents=0x1.head["test_llreadvllwritevdatapoolfullfile1269955"] 0x7f84900088e0) caught async_err = (28) No space left on device </code></pre> and then this, the call is stalled: <pre><code class="text"> 2024-02-12T19:09:43.976+0530 7f84977fe6c0 20 client.4304 upkeep thread waiting interval 1.000000000s 2024-02-12T19:09:44.614+0530 7f84b1ffb6c0 1 client.4304 _handle_full_flag: FULL: cancelling outstanding operations on 1 2024-02-12T19:09:44.614+0530 7f84b1ffb6c0 1 client.4304 _handle_full_flag: FULL: cancelling outstanding operations on 2 2024-02-12T19:09:44.614+0530 7f84b1ffb6c0 1 client.4304 _handle_full_flag: FULL: cancelling outstanding operations on 3 2024-02-12T19:09:44.614+0530 7f84b1ffb6c0 10 client.4304 unmounting: trim pass, size was 0+2 2024-02-12T19:09:44.614+0530 7f84b1ffb6c0 20 client.4304 trim_cache size 0 max 16384 2024-02-12T19:09:44.614+0530 7f84b1ffb6c0 10 client.4304 unmounting: trim pass, size still 0+2 2024-02-12T19:09:44.977+0530 7f84977fe6c0 20 client.4304 tick 2024-02-12T19:09:44.977+0530 7f84977fe6c0 20 client.4304 collect_and_send_metrics 2024-02-12T19:09:44.977+0530 7f84977fe6c0 20 client.4304 collect_and_send_global_metrics 2024-02-12T19:09:44.977+0530 7f84977fe6c0 10 client.4304 _put_inode on 0x1.head(faked_ino=0 nref=2 ll_ref=0 cap_refs={1024=0} open={} mode=40755 size=0/0 nlink=1 btime=2024-02-12T18:41:58.976066+0530 mtime=2024-02-12T18:42:52.646736+0530 ctime=2024-02-12T18:42:52.646736+0530 change_attr=1 caps=pAsLsXs(0=pAsLsXs) has_dir_layout 0x7f84900081e0) n = 1 2024-02-12T19:09:44.977+0530 7f84977fe6c0 10 client.4304 remove_cap mds.0 on 0x1.head(faked_ino=0 nref=1 ll_ref=0 cap_refs={1024=0} open={} mode=40755 size=0/0 nlink=1 btime=2024-02-12T18:41:58.976066+0530 mtime=2024-02-12T18:42:52.646736+0530 ctime=2024-02-12T18:42:52.646736+0530 change_attr=1 caps=pAsLsXs(0=pAsLsXs) has_dir_layout 0x7f84900081e0) 2024-02-12T19:09:44.977+0530 7f84977fe6c0 15 client.4304 remove_cap last one, closing snaprealm 0x7f84900080f0 2024-02-12T19:09:44.977+0530 7f84977fe6c0 20 client.4304 put_snap_realm 0x1 0x7f84900080f0 2 -> 1 2024-02-12T19:09:44.977+0530 7f84977fe6c0 10 client.4304 _put_inode deleting 0x1.head(faked_ino=0 nref=1 ll_ref=0 cap_refs={1024=0} open={} mode=40755 size=0/0 nlink=1 btime=2024-02-12T18:41:58.976066+0530 mtime=2024-02-12T18:42:52.646736+0530 ctime=2024-02-12T18:42:52.646736+0530 change_attr=1 caps=- has_dir_layout 0x7f84900081e0) 2024-02-12T19:09:44.977+0530 7f84977fe6c0 10 client.4304 _put_inode on 0x10000000000.head(faked_ino=0 nref=4 ll_ref=0 cap_refs={4=0,1024=0,4096=0,8192=0} open={3=0} mode=100666 size=106287857664/110582824960 nlink=1 btime=2024-02-12T18:42:52.646736+0530 mtime=2024-02-12T19:09:43.796040+0530 ctime=2024-02-12T19:09:43.796040+0530 change_attr=100 caps=p(0=p) flushing_caps=Fw objectset[0x10000000000 ts 0/0 objects 332 dirty_or_tx 0] 0x7f84900088e0) n = 2 2024-02-12T19:09:44.977+0530 7f84977fe6c0 20 client.4304 trim_cache size 0 max 16384 2024-02-12T19:09:44.977+0530 7f84977fe6c0 20 client.4304 upkeep thread waiting interval 1.000000000s 2024-02-12T19:09:45.682+0530 7f84b1ffb6c0 1 client.4304 _handle_full_flag: FULL: cancelling outstanding operations on 1 2024-02-12T19:09:45.682+0530 7f84b1ffb6c0 1 client.4304 _handle_full_flag: FULL: cancelling outstanding operations on 2 2024-02-12T19:09:45.682+0530 7f84b1ffb6c0 1 client.4304 _handle_full_flag: FULL: cancelling outstanding operations on 3 2024-02-12T19:09:45.682+0530 7f84b1ffb6c0 10 client.4304 unmounting: trim pass, size was 0+1 2024-02-12T19:09:45.682+0530 7f84b1ffb6c0 20 client.4304 trim_cache size 0 max 16384 2024-02-12T19:09:45.682+0530 7f84b1ffb6c0 10 client.4304 unmounting: trim pass, size still 0+1 2024-02-12T19:09:45.977+0530 7f84977fe6c0 20 client.4304 tick 2024-02-12T19:09:45.977+0530 7f84977fe6c0 20 client.4304 collect_and_send_metrics 2024-02-12T19:09:45.977+0530 7f84977fe6c0 20 client.4304 collect_and_send_global_metrics 2024-02-12T19:09:45.977+0530 7f84977fe6c0 20 client.4304 trim_cache size 0 max 16384 2024-02-12T19:09:45.977+0530 7f84977fe6c0 20 client.4304 upkeep thread waiting interval 1.000000000s 2024-02-12T19:09:46.977+0530 7f84977fe6c0 20 client.4304 tick </code></pre>