Project

General

Profile

Bug #61594

Updated by Samuel Just 12 months ago

Original state: 

 <pre> 
 # ceph -s 
   cluster: 
     id:       0fc51c26-02dd-11ee-82fa-000af7995d6c 
     health: HEALTH_WARN 
             noscrub,nodeep-scrub flag(s) set 
             Degraded data redundancy: 229496390/1146156225 objects degraded (20.023%), 3660 pgs degraded, 3660 pgs undersized 

   services: 
     mon: 3 daemons, quorum f28-h21-000-r630,f28-h23-000-r630,f28-h22-000-r630 (age 28h) 
     mgr: f28-h22-000-r630.whjflb(active, since 28h), standbys: f28-h23-000-r630.zctfhz, f28-h21-000-r630.opssdk 
     osd: 192 osds: 192 up (since 14h), 192 in (since 15h); 4003 remapped pgs 
          flags noscrub,nodeep-scrub 
     rgw: 8 daemons active (8 hosts, 1 zones) 

   data: 
     pools:     7 pools, 4769 pgs 
     objects: 191.03M objects, 9.7 TiB 
     usage:     28 TiB used, 333 TiB / 361 TiB avail 
     pgs:       229496390/1146156225 objects degraded (20.023%) 
              49389321/1146156225 objects misplaced (4.309%) 
              3652 active+undersized+degraded+remapped+backfill_wait 
              765    active+clean 
              343    active+remapped+backfill_wait 
              7      active+recovery_wait+undersized+degraded+remapped 
              1      active+recovery_wait 
              1      active+recovering+undersized+degraded+remapped 
 </pre> 

 pg dump: 

 <pre> 
 # ceph pg dump | grep recovering 
 7.fd         47052                     0           2            0          0    2625982464              0             0    1486        1486       active+recovering+undersized+degraded+remapped    2023-06-05T03:27:30.378744+0000      2721'51980     2794:163727        [36,104,12,98,37,159]            36         [36,104,12,98,23,NONE]                36            0'0    2023-06-04T13:49:14.163307+0000                0'0    2023-06-04T13:49:14.163307+0000                0                      0    queued for scrub                                                             0                  0 
 </pre> 

 dump_op_pq_state on osd.36 (primary) 
 <pre> 
 # ceph --admin-daemon /var/run/ceph/0fc51c26-02dd-11ee-82fa-000af7995d6c/ceph-osd.36.asok dump_op_pq_state 
 { 
     "OSD:ShardedOpWQ:0": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 0, 
             "clients": "{ PriorityQueue:: HEAPS-EMPTY }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:LIMIT:READY:" 
         } 
     }, 
     "OSD:ShardedOpWQ:1": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 0, 
             "clients": "{ PriorityQueue:: HEAPS-EMPTY }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:LIMIT:READY:" 
         } 
     }, 
     "OSD:ShardedOpWQ:2": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 0, 
             "clients": "{ PriorityQueue:: HEAPS-EMPTY }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:LIMIT:READY:" 
         } 
     }, 
     "OSD:ShardedOpWQ:3": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 15, 
             "clients": "{ PriorityQueue::    { client:{ class_id: 3 client_id: 19145 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19145 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8446 p:732788.9572 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 19210 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19210 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8379 p:985550.2676 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 19235 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19235 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7961 p:601269.3723 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 19290 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19290 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3795 p:476374.6644 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 19340 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19340 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7979 p:633339.5302 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 28926 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 28926 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3236 p:590459.0414 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37759 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37759 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3253 p:923097.1278 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37794 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37794 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8433 p:605618.9912 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37804 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37804 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8466 p:238305.3975 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37839 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37839 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8365 p:340187.5934 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37854 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37854 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7976 p:703405.5547 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37889 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37909 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37909 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7992 p:718490.5572 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 38583 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 38583 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8426 p:854006.4917 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 38603 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 38603 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8447 p:599985.7752 l:0.0000 } req_count:0 top_req:none } } { reservation_top:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none } } { ready_top:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none } } { limit_top:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none } } }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19235 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7961 p:601269.3723 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19290 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3795 p:476374.6644 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37854 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7976 p:703405.5547 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37759 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3253 p:923097.1278 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37839 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8365 p:340187.5934 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38603 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8447 p:599985.7752 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 28926 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3236 p:590459.0414 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38583 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8426 p:854006.4917 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19210 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8379 p:985550.2676 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37804 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8466 p:238305.3975 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19145 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8446 p:732788.9572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37909 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7992 p:718490.5572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19340 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7979 p:633339.5302 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37794 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8433 p:605618.9912 l:0.0000 } req_count:0 top_req:none }LIMIT:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19235 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7961 p:601269.3723 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19290 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3795 p:476374.6644 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37854 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7976 p:703405.5547 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37804 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8466 p:238305.3975 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38603 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8447 p:599985.7752 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 28926 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3236 p:590459.0414 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19340 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7979 p:633339.5302 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37759 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3253 p:923097.1278 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38583 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8426 p:854006.4917 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19210 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8379 p:985550.2676 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37909 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7992 p:718490.5572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19145 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8446 p:732788.9572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37794 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8433 p:605618.9912 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37839 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8365 p:340187.5934 l:0.0000 } req_count:0 top_req:none }READY:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19235 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7961 p:601269.3723 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19290 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3795 p:476374.6644 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37854 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7976 p:703405.5547 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37804 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8466 p:238305.3975 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 28926 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3236 p:590459.0414 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38603 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8447 p:599985.7752 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19145 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8446 p:732788.9572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38583 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8426 p:854006.4917 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37909 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7992 p:718490.5572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19210 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8379 p:985550.2676 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37759 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3253 p:923097.1278 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37839 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8365 p:340187.5934 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19340 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7979 p:633339.5302 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37794 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8433 p:605618.9912 l:0.0000 } req_count:0 top_req:none }" 
         } 
     }, 
     "OSD:ShardedOpWQ:4": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 0, 
             "clients": "{ PriorityQueue:: HEAPS-EMPTY }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:LIMIT:READY:" 
         } 
     } 
 } 
 </pre> 

 Odd that we are enumerating all clients and that they are clustered on a single shard, but unrelated to this bug (see TODO). 

 PG query is attached, but doesn't show anything interesting, no unfound objects.

Back