Project

General

Profile

Bug #23350

Updated by Patrick Donnelly about 6 years ago

For: http://pulpito.ceph.com/pdonnell-2018-03-11_22:42:18-multimds-wip-pdonnell-testing-20180311.180352-testing-basic-smithi/2280268

<pre>
sudo ceph daemon mds.e dump_blocked_ops
{
"ops": [
{
"description": "client_request(client.4513:2703 unlink #0x10000000154/datafile92 2018-03-13 16:23:11.898232 caller_uid=1000, caller_gid=1184{6,27,119,1184,10102,})",
"initiated_at": "2018-03-13 16:23:11.898388",
"age": 19612.193106,
"duration": 19612.193128,
"type_data": {
"flag_point": "failed to xlock, waiting",
"reqid": "client.4513:2703",
"op_type": "client_request",
"client_info": {
"client": "client.4513",
"tid": 2703
},
"events": [
{
"time": "2018-03-13 16:23:11.898388",
"event": "initiated"
},
{
"time": "2018-03-13 16:23:11.898388",
"event": "header_read"
},
{
"time": "2018-03-13 16:23:11.898390",
"event": "throttled"
},
{
"time": "2018-03-13 16:23:11.898407",
"event": "all_read"
},
{
"time": "2018-03-13 16:23:11.898444",
"event": "dispatched"
},
{
"time": "2018-03-13 16:23:11.899243",
"event": "failed to xlock, waiting"
},
{
"time": "2018-03-13 16:23:11.899913",
"event": "failed to xlock, waiting"
}
]
}
},
{
"description": "internal op exportdir:mds.8:1",
"initiated_at": "2018-03-13 16:23:17.877220",
"age": 19606.214273,
"duration": 19606.214330,
"type_data": {
"flag_point": "failed to wrlock, waiting",
"reqid": "mds.8:1",
"op_type": "internal_op",
"internal_op": 5377,
"op_name": "exportdir",
"events": [
{
"time": "2018-03-13 16:23:17.877220",
"event": "initiated"
},
{
"time": "2018-03-13 16:23:17.877220",
"event": "header_read"
},
{
"time": "0.000000",
"event": "throttled"
},
{
"time": "0.000000",
"event": "all_read"
},
{
"time": "0.000000",
"event": "dispatched"
},
{
"time": "2018-03-13 16:23:17.932443",
"event": "requesting remote authpins"
},
{
"time": "2018-03-13 16:23:17.935152",
"event": "failed to wrlock, waiting"
}
]
}
}
],
"complaint_time": 30.000000,
"num_blocked_ops": 2
}
</pre>

<pre>
$ ceph status
cluster:
id: 7fdb719f-6f45-4e39-8d31-3e4589969b5f
health: HEALTH_WARN
1 clients failing to respond to capability release
3 MDSs report slow requests

services:
mon: 3 daemons, quorum b,a,c
mgr: x(active), standbys: y
mds: cephfs-9/9/9 up {0=i=up:active,1=h=up:active,2=c=up:active,3=d=up:active,4=a=up:active,5=b=up:active,6=g=up:active,7=f=up:active,8=e=up:active}
osd: 8 osds: 8 up, 8 in

data:
pools: 3 pools, 24 pgs
objects: 2.39K objects, 8.60G
usage: 19.0G used, 696G / 715G avail
pgs: 24 active+clean
</pre>

<pre>
2018-03-13 16:23:17.874 7f665b0a7700 7 mds.8.migrator export_empty_import [dir 0x10000000154.000* /client.0/tmp/tmp/data/ [2,head] auth{1=1,2=1} v=2552 cv=2104/2104 dir_auth=8 state=1610612738|complete f(v0 m2018-03-13 16:23:11.308620)/f(v0 m2018-03-13 16:15:20.801855 12=12+0) n(v30 rc2018-03-13 16:23:11.308620)/n(v30 rc2018-03-13 16:15:20.801855 b1258291200 12=12+0) hs=0+12,ss=0+0 dirty=12 | child=1 frozen=0 subtree=1 importing=0 replicated=1 dirty=1 authpin=0 0x561f0bef4a00]
2018-03-13 16:23:17.874 7f665b0a7700 7 mds.8.migrator really empty, exporting to 2
2018-03-13 16:23:17.874 7f665b0a7700 7 mds.8.migrator exporting to mds.2 empty import [dir 0x10000000154.000* /client.0/tmp/tmp/data/ [2,head] auth{1=1,2=1} v=2552 cv=2104/2104 dir_auth=8 state=1610612738|complete f(v0 m2018-03-13 16:23:11.308620)/f(v0 m2018-03-13 16:15:20.801855 12=12+0) n(v30 rc2018-03-13 16:23:11.308620)/n(v30 rc2018-03-13 16:15:20.801855 b1258291200 12=12+0) hs=0+12,ss=0+0 dirty=12 | child=1 frozen=0 subtree=1 importing=0 replicated=1 dirty=1 authpin=0 0x561f0bef4a00]
2018-03-13 16:23:17.874 7f665b0a7700 7 mds.8.migrator export_dir [dir 0x10000000154.000* /client.0/tmp/tmp/data/ [2,head] auth{1=1,2=1} v=2552 cv=2104/2104 dir_auth=8 state=1610612738|complete f(v0 m2018-03-13 16:23:11.308620)/f(v0 m2018-03-13 16:15:20.801855 12=12+0) n(v30 rc2018-03-13 16:23:11.308620)/n(v30 rc2018-03-13 16:15:20.801855 b1258291200 12=12+0) hs=0+12,ss=0+0 dirty=12 | child=1 frozen=0 subtree=1 importing=0 replicated=1 dirty=1 authpin=0 0x561f0bef4a00] to 2
2018-03-13 16:23:17.874 7f665b0a7700 15 mds.8.7 hit export target (new) 10 @ 2018-03-13 16:23:17.877199
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.cache.dir(0x10000000154.000*) auth_pin by 0x561f0bb3b200 on [dir 0x10000000154.000* /client.0/tmp/tmp/data/ [2,head] auth{1=1,2=1} v=2552 cv=2104/2104 dir_auth=8 ap=1+0+0 state=1610612738|complete f(v0 m2018-03-13 16:23:11.308620)/f(v0 m2018-03-13 16:15:20.801855 12=12+0) n(v30 rc2018-03-13 16:23:11.308620)/n(v30 rc2018-03-13 16:15:20.801855 b1258291200 12=12+0) hs=0+12,ss=0+0 dirty=12 | child=1 frozen=0 subtree=1 importing=0 replicated=1 dirty=1 authpin=1 0x561f0bef4a00] count now 1 + 0
2018-03-13 16:23:17.874 7f665b0a7700 7 mds.8.cache request_start_internal request(mds.8:1) op 5377
2018-03-13 16:23:17.874 7f665b0a7700 7 mds.8.migrator dispatch_export_dir request(mds.8:1)
2018-03-13 16:23:17.874 7f665b0a7700 7 mds.8.migrator dest is not yet an export target
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.cache trim_client_leases
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.cache trim_client_leases pool 1 trimmed 0 leases, 14 left
2018-03-13 16:23:17.874 7f66608b2700 1 -- 172.21.15.22:6817/3347805822 <== osd.1 172.21.15.156:6812/12550 71 ==== osd_op_reply(111 508.00000003 [delete] v20'43 uv41 ondisk = -2 ((2) No such file or directory)) v9 ==== 157+0+0 (2998213817 0 0) 0x561f0c28fa00 con 0x561f0bbabc00
2018-03-13 16:23:17.874 7f665b0a7700 2 mds.8.cache check_memory_usage total 406184, rss 35892, heap 305772, baseline 305772, buffers 0, 6 / 34 inodes have caps, 6 caps, 0.176471 caps per inode
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.log trim 1 / 30 segments, 317 / -1 events, 0 (0) expiring, 0 (0) expired
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.log _trim_expired_segments waiting for 1/4194304 to expire
2018-03-13 16:23:17.874 7f665b0a7700 15 mds.8.bal get_load mdsload<[19.3849,5.47398 30.3329]/[19.3849,5.47398 30.3329], req 184, hr 0, qlen 0, cpu 3.47>
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.server find_idle_sessions. laggy until 0.000000
2018-03-13 16:23:17.874 7f665b0a7700 20 mds.8.server laggiest active session is client.4513 172.21.15.76:0/818533459
2018-03-13 16:23:17.874 7f665b0a7700 20 mds.8.server laggiest active session is client.4513 172.21.15.76:0/818533459 and sufficiently new (2018-03-13 16:23:07.041623)
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.locker scatter_tick
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.locker scatter_nudge replica, requesting scatter/unscatter of (ifile mix w=1 dirty) on [inode 0x10000000154 [...2,head] /client.0/tmp/tmp/data/ rep@2.1 fragtree_t(*^3) v3424 f(v0 m2018-03-13 16:17:59.160280 128=128+0) n(v30 rc2018-03-13 16:17:59.160280 b13421772800 129=128+1)/n(v3 rc2018-03-13 16:17:59.160280 b13421772800 129=128+1) (isnap sync r=1) (inest mix->lock w=1 dirty) (ifile mix w=1 dirty) (iversion lock) caps={4513=pAsLsXs/Fx@50} | dirtyscattered=2 request=0 lock=3 importing=0 dirfrag=2 caps=1 waiter=0 0x561f0bef3c00]
2018-03-13 16:23:17.874 7f665b0a7700 1 -- 172.21.15.22:6817/3347805822 _send_message--> mds.2 172.21.15.156:6817/1524570799 -- lock(a=nudge ifile 0x10000000154.head) v1 -- ?+0 0x561f0c134c00
2018-03-13 16:23:17.874 7f665b0a7700 1 -- 172.21.15.22:6817/3347805822 --> 172.21.15.156:6817/1524570799 -- lock(a=nudge ifile 0x10000000154.head) v1 -- 0x561f0c134c00 con 0
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.locker scatter_nudge auth, scatter/unscattering (inest sync dirty) on [inode 0x108 [...2,head] ~mds8/ auth{1=1,2=1} v1 snaprealm=0x561f0ba3cf00 f(v0 10=0+10) n(v0 11=0+11) (inest sync dirty) (iversion lock) | dirtyscattered=1 dirfrag=1 replicated=1 0x561f0bacd800]
2018-03-13 16:23:17.874 7f665b0a7700 7 mds.8.locker scatter_mix (inest sync dirty) on [inode 0x108 [...2,head] ~mds8/ auth{1=1,2=1} v1 snaprealm=0x561f0ba3cf00 f(v0 10=0+10) n(v0 11=0+11) (inest sync dirty) (iversion lock) | dirtyscattered=1 dirfrag=1 replicated=1 0x561f0bacd800]
2018-03-13 16:23:17.874 7f665b0a7700 1 -- 172.21.15.22:6817/3347805822 _send_message--> mds.1 172.21.15.22:6819/2349255223 -- lock(a=mix inest 0x108.head) v1 -- ?+0 0x561f0c134ec0
2018-03-13 16:23:17.874 7f665b0a7700 1 -- 172.21.15.22:6817/3347805822 --> 172.21.15.22:6819/2349255223 -- lock(a=mix inest 0x108.head) v1 -- 0x561f0c134ec0 con 0
2018-03-13 16:23:17.874 7f665b0a7700 1 -- 172.21.15.22:6817/3347805822 _send_message--> mds.2 172.21.15.156:6817/1524570799 -- lock(a=mix inest 0x108.head) v1 -- ?+0 0x561f0c135180
2018-03-13 16:23:17.874 7f665b0a7700 1 -- 172.21.15.22:6817/3347805822 --> 172.21.15.156:6817/1524570799 -- lock(a=mix inest 0x108.head) v1 -- 0x561f0c135180 con 0
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.cache.ino(0x108) auth_pin by 0x561f0bacde50 on [inode 0x108 [...2,head] ~mds8/ auth{1=1,2=1} v1 ap=1+0 snaprealm=0x561f0ba3cf00 f(v0 10=0+10) n(v0 11=0+11) (inest sync->mix g=1,2 dirty) (iversion lock) | dirtyscattered=1 dirfrag=1 replicated=1 authpin=1 0x561f0bacd800] now 1+0
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.locker scatter_nudge auth, waiting for stable (inest sync->mix g=1,2 dirty) on [inode 0x108 [...2,head] ~mds8/ auth{1=1,2=1} v1 ap=1+0 snaprealm=0x561f0ba3cf00 f(v0 10=0+10) n(v0 11=0+11) (inest sync->mix g=1,2 dirty) (iversion lock) | dirtyscattered=1 dirfrag=1 replicated=1 authpin=1 0x561f0bacd800]
2018-03-13 16:23:17.874 7f665b0a7700 10 mds.8.locker scatter_nudge replica, requesting scatter/unscatter of (inest mix->lock w=1 dirty) on [inode 0x10000000154 [...2,head] /client.0/tmp/tmp/data/ rep@2.1 fragtree_t(*^3) v3424 f(v0 m2018-03-13 16:17:59.160280 128=128+0) n(v30 rc2018-03-13 16:17:59.160280 b13421772800 129=128+1)/n(v3 rc2018-03-13 16:17:59.160280 b13421772800 129=128+1) (isnap sync r=1) (inest mix->lock w=1 dirty) (ifile mix w=1 dirty) (iversion lock) caps={4513=pAsLsXs/Fx@50} | dirtyscattered=2 request=0 lock=3 importing=0 dirfrag=2 caps=1 waiter=0 0x561f0bef3c00]
2018-03-13 16:23:17.874 7f665b0a7700 1 -- 172.21.15.22:6817/3347805822 _send_message--> mds.2 172.21.15.156:6817/1524570799 -- lock(a=nudge inest 0x10000000154.head) v1 -- ?+0 0x561f0c005c80
2018-03-13 16:23:17.874 7f665b0a7700 1 -- 172.21.15.22:6817/3347805822 --> 172.21.15.156:6817/1524570799 -- lock(a=nudge inest 0x10000000154.head) v1 -- 0x561f0c005c80 con 0
2018-03-13 16:23:17.874 7f66600b1700 1 -- 172.21.15.22:6817/3347805822 <== osd.5 172.21.15.22:6813/12665 11 ==== osd_op_reply(115 508.00000000 [writefull 0~90] v20'104 uv104 ondisk = 0) v9 ==== 157+0+0 (1317151691 0 0) 0x561f0c314d80 con 0x561f0bbac300
</pre>

Log preserved: From: /ceph/tmp/pdonnell/2280268/ceph-mds.e.log

Back