telegeo02:~ # echo module ceph +p > /sys/kernel/debug/dynamic_debug/control telegeo02:~ # echo module cephfs +p > /sys/kernel/debug/dynamic_debug/control tom@telegeo02:~> cp -v /somewhere//S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF /mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF '/somewhere//S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' -> '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' cp: error writing '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF': Input/output error telegeo02:~ # cat trace_ceph_io_error | grep -vE '(unlocking|bytes cleaned|put_wrbuffer_cap_refs|writepages_finish|ceph_caps_issued|get_session|con_get|put_session|con_put|lookup_mds_session|check_delayed_caps|delayed_work|check_delayed_caps|send_renew_caps)' | head -n1000 -- Logs begin at jeu 2018-04-26 14:30:03 CEST, end at lun 2018-06-04 15:09:02 CEST. -- jun 04 15:07:49 telegeo02 kernel: ceph: check_caps ffff881ffa237c68 file_want - used Fc dirty Fw flushing - issued pAsxLsXsxFsxcrwb revoking - retain pAsLsXsFsc AUTHONLY FLUSH jun 04 15:07:49 telegeo02 kernel: ceph: mds0 cap ffff8801f6e78370 used Fc issued pAsxLsXsxFsxcrwb implemented pAsxLsXsxFsxcrwb revoking - jun 04 15:07:49 telegeo02 kernel: ceph: flushing dirty caps jun 04 15:07:49 telegeo02 kernel: ceph: __mark_caps_flushing flushing Fw, flushing_caps - -> Fw jun 04 15:07:49 telegeo02 kernel: ceph: inode ffff881ffa237c68 now !dirty jun 04 15:07:49 telegeo02 kernel: ceph: __send_cap ffff881ffa237c68 cap ffff8801f6e78370 session ffff88196587a800 pAsxLsXsxFsxcrwb -> pAsLsXsFsc (revoking -) jun 04 15:07:49 telegeo02 kernel: ceph: delaying issued pAsxLsXsxFsxcrwb -> pAsLsXsFsc, wanted pAsxXsxFxwb -> - on send jun 04 15:07:49 telegeo02 kernel: ceph: send_cap_msg update 29a5a4 100000524ab caps pAsxLsXsxFsxcrwb wanted pAsxXsxFxwb dirty Fw seq 6/4 tid 552904/552904 mseq 0 follows 1 size 384827392/0 xattr_ver 16646714064888378588 xattr_len 0 jun 04 15:07:49 telegeo02 kernel: ceph: check_caps ffff881ffa237c68 file_want - used Fc dirty - flushing Fw issued pAsxLsXsxFsxcrwb revoking - retain pAsLsXsFsc AUTHONLY FLUSH jun 04 15:07:49 telegeo02 kernel: ceph: __cap_set_timeouts ffff881ffa237c68 min 1272 max 15022 jun 04 15:07:49 telegeo02 kernel: ceph: __cap_delay_requeue ffff881ffa237c68 flags 2160 at 4553195720 jun 04 15:07:49 telegeo02 kernel: ceph: handle_caps from mds0 jun 04 15:07:49 telegeo02 kernel: ceph: op flush_ack ino 100000524ab.fffffffffffffffe inode ffff881ffa237c68 jun 04 15:07:49 telegeo02 kernel: ceph: mds0 seq 1180007 cap seq 6 jun 04 15:07:49 telegeo02 kernel: ceph: handle_cap_flush_ack inode ffff881ffa237c68 mds0 seq 6 on Fw cleaned Fw, flushing Fw -> - jun 04 15:07:49 telegeo02 kernel: ceph: inode ffff881ffa237c68 now !flushing jun 04 15:07:49 telegeo02 kernel: ceph: inode ffff881ffa237c68 now clean jun 04 15:08:02 telegeo02 kernel: ceph: handle_session mds0 renewcaps ffff88196587a800 state open seq 30262 jun 04 15:08:02 telegeo02 kernel: ceph: renewed_caps mds0 ttl now 4553198952, was fresh, now stale jun 04 15:08:02 telegeo02 kernel: ceph: handle_session mds1 renewcaps ffff881f329bb000 state open seq 29051 jun 04 15:08:02 telegeo02 kernel: ceph: renewed_caps mds1 ttl now 4553198952, was fresh, now stale jun 04 15:08:22 telegeo02 kernel: ceph: handle_session mds0 renewcaps ffff88196587a800 state open seq 30263 jun 04 15:08:22 telegeo02 kernel: ceph: renewed_caps mds0 ttl now 4553203952, was fresh, now stale jun 04 15:08:22 telegeo02 kernel: ceph: handle_session mds1 renewcaps ffff881f329bb000 state open seq 29052 jun 04 15:08:22 telegeo02 kernel: ceph: renewed_caps mds1 ttl now 4553203952, was fresh, now stale jun 04 15:08:42 telegeo02 kernel: ceph: handle_session mds0 renewcaps ffff88196587a800 state open seq 30264 jun 04 15:08:42 telegeo02 kernel: ceph: renewed_caps mds0 ttl now 4553208952, was fresh, now stale jun 04 15:08:42 telegeo02 kernel: ceph: handle_session mds1 renewcaps ffff881f329bb000 state open seq 29053 jun 04 15:08:42 telegeo02 kernel: ceph: renewed_caps mds1 ttl now 4553208952, was fresh, now stale jun 04 15:08:52 telegeo02 kernel: ceph: check_caps ffff881ffa237c68 file_want - used Fc dirty - flushing - issued pAsxLsXsxFsxcrwb revoking - retain pAsLsXsFsc NODELAY jun 04 15:08:52 telegeo02 kernel: ceph: mds0 cap ffff8801f6e78370 used Fc issued pAsxLsXsxFsxcrwb implemented pAsxLsXsxFsxcrwb revoking - jun 04 15:08:52 telegeo02 kernel: ceph: __send_cap ffff881ffa237c68 cap ffff8801f6e78370 session ffff88196587a800 pAsxLsXsxFsxcrwb -> pAsLsXsFsc (revoking -) jun 04 15:08:52 telegeo02 kernel: ceph: send_cap_msg update 29a5a4 100000524ab caps pAsLsXsFsc wanted - dirty - seq 6/4 tid 0/0 mseq 0 follows 0 size 384827392/0 xattr_ver 18446612253130914840 xattr_len 0 jun 04 15:08:52 telegeo02 kernel: ceph: check_caps ffff881ffa237c68 file_want - used Fc dirty - flushing - issued pAsLsXsFsc revoking - retain pAsLsXsFsc NODELAY jun 04 15:09:02 telegeo02 kernel: ceph: handle_session mds0 renewcaps ffff88196587a800 state open seq 30265 jun 04 15:09:02 telegeo02 kernel: ceph: renewed_caps mds0 ttl now 4553213952, was fresh, now stale jun 04 15:09:02 telegeo02 kernel: ceph: handle_session mds1 renewcaps ffff881f329bb000 state open seq 29054 jun 04 15:09:02 telegeo02 kernel: ceph: renewed_caps mds1 ttl now 4553213952, was fresh, now stale tom@telegeo02:~> cp -v /somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF /mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF '/somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' -> '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' cp: error writing '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF': Input/output error tom@telegeo02:~> cp -v /somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF /mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF '/somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' -> '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' cp: error writing '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF': Input/output error tom@telegeo02:~> cp -v /somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF /mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF '/somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' -> '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' cp: error writing '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF': Input/output error tom@telegeo02:~> cp -v /somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF /mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF '/somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' -> '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' cp: error writing '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF': Input/output error tom@telegeo02:~> cp -v /somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF /mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF '/somewhere/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' -> '/mnt/cephfs/pool_21p3/S2A_MSIL2A_20170223T165311_N0204_R026_T14QNE_20170223T170837.SAFE/S2A_OPER_SSC_L2VALD_14QNE____20170223.DBL.DIR/S2A_OPER_SSC_PDTIMG_L2VALD_14QNE____20170223_FRE_R1.DBL.TIF' tom@telegeo02:~> finally it worked !! ...