Project

General

Profile

Bug #61594

Updated by Samuel Just 12 months ago

Original state: 

 <pre> 
 # ceph -s 
   cluster: 
     id:       0fc51c26-02dd-11ee-82fa-000af7995d6c 
     health: HEALTH_WARN 
             noscrub,nodeep-scrub flag(s) set 
             Degraded data redundancy: 229496390/1146156225 objects degraded (20.023%), 3660 pgs degraded, 3660 pgs undersized 

   services: 
     mon: 3 daemons, quorum f28-h21-000-r630,f28-h23-000-r630,f28-h22-000-r630 (age 28h) 
     mgr: f28-h22-000-r630.whjflb(active, since 28h), standbys: f28-h23-000-r630.zctfhz, f28-h21-000-r630.opssdk 
     osd: 192 osds: 192 up (since 14h), 192 in (since 15h); 4003 remapped pgs 
          flags noscrub,nodeep-scrub 
     rgw: 8 daemons active (8 hosts, 1 zones) 

   data: 
     pools:     7 pools, 4769 pgs 
     objects: 191.03M objects, 9.7 TiB 
     usage:     28 TiB used, 333 TiB / 361 TiB avail 
     pgs:       229496390/1146156225 objects degraded (20.023%) 
              49389321/1146156225 objects misplaced (4.309%) 
              3652 active+undersized+degraded+remapped+backfill_wait 
              765    active+clean 
              343    active+remapped+backfill_wait 
              7      active+recovery_wait+undersized+degraded+remapped 
              1      active+recovery_wait 
              1      active+recovering+undersized+degraded+remapped 
 </pre> 

 pg dump: 

 <pre> 
 # ceph pg dump | grep recovering 
 7.fd         47052                     0           2            0          0    2625982464              0             0    1486        1486       active+recovering+undersized+degraded+remapped    2023-06-05T03:27:30.378744+0000      2721'51980     2794:163727        [36,104,12,98,37,159]            36         [36,104,12,98,23,NONE]                36            0'0    2023-06-04T13:49:14.163307+0000                0'0    2023-06-04T13:49:14.163307+0000                0                      0    queued for scrub                                                             0                  0 
 </pre> 

 dump_op_pq_state on osd.36 (primary) 
 <pre> 
 # ceph --admin-daemon /var/run/ceph/0fc51c26-02dd-11ee-82fa-000af7995d6c/ceph-osd.36.asok dump_op_pq_state 
 { 
     "OSD:ShardedOpWQ:0": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 0, 
             "clients": "{ PriorityQueue:: HEAPS-EMPTY }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:LIMIT:READY:" 
         } 
     }, 
     "OSD:ShardedOpWQ:1": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 0, 
             "clients": "{ PriorityQueue:: HEAPS-EMPTY }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:LIMIT:READY:" 
         } 
     }, 
     "OSD:ShardedOpWQ:2": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 0, 
             "clients": "{ PriorityQueue:: HEAPS-EMPTY }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:LIMIT:READY:" 
         } 
     }, 
     "OSD:ShardedOpWQ:3": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 15, 
             "clients": "{ PriorityQueue::    { client:{ class_id: 3 client_id: 19145 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19145 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8446 p:732788.9572 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 19210 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19210 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8379 p:985550.2676 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 19235 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19235 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7961 p:601269.3723 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 19290 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19290 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3795 p:476374.6644 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 19340 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 19340 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7979 p:633339.5302 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 28926 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 28926 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3236 p:590459.0414 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37759 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37759 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3253 p:923097.1278 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37794 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37794 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8433 p:605618.9912 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37804 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37804 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8466 p:238305.3975 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37839 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37839 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8365 p:340187.5934 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37854 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37854 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7976 p:703405.5547 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37889 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 37909 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 37909 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7992 p:718490.5572 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 38583 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 38583 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8426 p:854006.4917 l:0.0000 } req_count:0 top_req:none } }    { client:{ class_id: 3 client_id: 38603 profile_id: 0 }, record:{ ClientRec:: client:{ class_id: 3 client_id: 38603 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8447 p:599985.7752 l:0.0000 } req_count:0 top_req:none } } { reservation_top:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none } } { ready_top:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none } } { limit_top:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none } } }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19235 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7961 p:601269.3723 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19290 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3795 p:476374.6644 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37854 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7976 p:703405.5547 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37759 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3253 p:923097.1278 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37839 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8365 p:340187.5934 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38603 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8447 p:599985.7752 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 28926 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3236 p:590459.0414 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38583 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8426 p:854006.4917 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19210 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8379 p:985550.2676 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37804 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8466 p:238305.3975 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19145 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8446 p:732788.9572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37909 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7992 p:718490.5572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19340 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7979 p:633339.5302 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37794 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8433 p:605618.9912 l:0.0000 } req_count:0 top_req:none }LIMIT:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19235 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7961 p:601269.3723 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19290 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3795 p:476374.6644 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37854 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7976 p:703405.5547 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37804 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8466 p:238305.3975 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38603 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8447 p:599985.7752 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 28926 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3236 p:590459.0414 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19340 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7979 p:633339.5302 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37759 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3253 p:923097.1278 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38583 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8426 p:854006.4917 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19210 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8379 p:985550.2676 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37909 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7992 p:718490.5572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19145 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8446 p:732788.9572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37794 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8433 p:605618.9912 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37839 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8365 p:340187.5934 l:0.0000 } req_count:0 top_req:none }READY:{ ClientRec:: client:{ class_id: 3 client_id: 37889 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3778 p:221586.1626 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19235 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7961 p:601269.3723 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19290 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989233.3795 p:476374.6644 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37854 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.7976 p:703405.5547 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37804 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8466 p:238305.3975 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 28926 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3236 p:590459.0414 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38603 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8447 p:599985.7752 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19145 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8446 p:732788.9572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 38583 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8426 p:854006.4917 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37909 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7992 p:718490.5572 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19210 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8379 p:985550.2676 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37759 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989224.3253 p:923097.1278 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37839 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989223.8365 p:340187.5934 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 19340 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:988849.7979 p:633339.5302 l:0.0000 } req_count:0 top_req:none }, { ClientRec:: client:{ class_id: 3 client_id: 37794 profile_id: 0 } prev_tag:{ RequestTag:: ready:false r:989217.8433 p:605618.9912 l:0.0000 } req_count:0 top_req:none }" 
         } 
     }, 
     "OSD:ShardedOpWQ:4": { 
         "queue_sizes": { 
             "immediate": 0, 
             "scheduler": 0 
         }, 
         "mClockClients": { 
             "client_count": 0, 
             "clients": "{ PriorityQueue:: HEAPS-EMPTY }" 
         }, 
         "mClockQueues": { 
             "queues": "RESER:LIMIT:READY:" 
         } 
     } 
 } 
 </pre> 

 Odd that we are enumerating all clients and that they are clustered on a single shard, but unrelated to this bug (see TODO). 

 PG query is attached, but doesn't show anything interesting, no unfound objects. 

 ceph osd down 36 causes the pg to repeer and end up in the same state. 

 Enabled debug_osd = 20 on 36, marked it down, captured log (attached gzipped log with 7.fds0 grepped out).

Back