Project

General

Profile

Bug #9750

Updated by Loïc Dachary over 9 years ago

<pre> 
 # ceph --version 
 ceph version 0.80.6 (f93610a4421cb670b08e974c6550ee715ac528ae) 
 # ceph -s 
     cluster 1fe74663-8dfa-486c-bb80-3bd94c90c967 
      health HEALTH_WARN 33 pgs incomplete; 33 pgs stuck inactive; 33 pgs stuck unclean; 230 requests are blocked > 32 sec; nodeep-scrub flag(s) set 
      monmap e8: 3 mons at {g1=192.168.99.251:6789/0,g2=192.168.99.252:6789/0,g3=192.168.99.253:6789/0}, election epoch 732, quorum 0,1,2 g1,g2,g3 
      osdmap e11525: 12 osds: 11 up, 10 in 
             flags nodeep-scrub 
       pgmap v2245931: 480 pgs, 5 pools, 435 GB data, 111 kobjects 
             890 GB used, 17477 GB / 18375 GB avail 
                  447 active+clean 
                   33 incomplete 
   client io 11634 B/s wr, 0 op/s 
 # ceph osd dump  
 epoch 11525 
 fsid 1fe74663-8dfa-486c-bb80-3bd94c90c967 
 created 2014-08-27 14:10:20.841398 
 modified 2014-10-12 15:04:23.238437 
 flags nodeep-scrub 
 pool 57 'ssds' replicated size 2 min_size 2 crush_ruleset 1 object_hash rjenkins pg_num 128 pgp_num 128 last_change 11338 flags hashpspool stripe_width 0 
 pool 58 'disks' replicated size 2 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 128 pgp_num 128 last_change 11344 lfor 1717 flags hashpspool tiers 64 read_tier 64 write_tier 64 stripe_width 0 
 pool 59 'images' replicated size 2 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 32 pgp_num 32 last_change 11335 flags hashpspool stripe_width 0 
	 removed_snaps [1~4,7~20,29~5] 
 pool 64 'ssd-cache' replicated size 2 min_size 2 crush_ruleset 1 object_hash rjenkins pg_num 128 pgp_num 128 last_change 11344 flags hashpspool,incomplete_clones tier_of 58 cache_mode forward target_bytes 20000000000 target_objects 20000 hit_set bloom{false_positive_probability: 0.05, target_size: 0, seed: 0} 60s x1 stripe_width 0 
 pool 70 'sileht-pool' replicated size 1 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 11297 owner 100 flags hashpspool max_bytes 107374182400 stripe_width 0 
 max_osd 12 
 osd.0 up     in    weight 1 up_from 11243 up_thru 11295 down_at 11239 last_clean_interval [8886,11238) 192.168.99.251:6800/24624 192.168.99.251:6801/24624 192.168.99.251:6803/24624 192.168.99.251:6808/24624 exists,up 86a6f791-ffc0-49fb-bf94-e2159c70ab21 
 osd.1 up     in    weight 1 up_from 10686 up_thru 11295 down_at 10683 last_clean_interval [8881,10685) 192.168.99.252:6800/2505 192.168.99.252:6809/5002505 192.168.99.252:6810/5002505 192.168.99.252:6811/5002505 exists,up 9d598c77-dbe8-486f-85cd-8b2e5b7a0040 
 osd.2 up     in    weight 1 up_from 10958 up_thru 11295 down_at 10854 last_clean_interval [10820,10853) 192.168.99.253:6810/11312 192.168.99.253:6811/11312 192.168.99.253:6812/11312 192.168.99.253:6813/11312 exists,up a0e57816-9e96-4576-a2d3-01a0fdf81532 
 osd.3 down out weight 0 up_from 10685 up_thru 11055 down_at 11247 last_clean_interval [10485,10684) 192.168.99.251:6802/24042 192.168.99.251:6804/2024042 192.168.99.251:6805/2024042 192.168.99.251:6806/2024042 autoout,exists adf6f901-1a9a-4573-875b-e9730a27be8e 
 osd.4 up     in    weight 1 up_from 11517 up_thru 11524 down_at 11513 last_clean_interval [11504,11513) 192.168.99.252:6803/26617 192.168.99.252:6805/26617 192.168.99.252:6807/26617 192.168.99.252:6815/26617 exists,up d384579f-c639-4508-bcc2-0b13d15d10fe 
 osd.5 up     in    weight 1 up_from 11515 up_thru 11520 down_at 11510 last_clean_interval [11287,11510) 192.168.99.253:6800/2438 192.168.99.253:6801/2438 192.168.99.253:6804/2438 192.168.99.253:6815/2438 exists,up 3eca9b28-07ce-4e58-a44f-181a5690af97 
 osd.6 up     in    weight 1 up_from 11524 up_thru 11524 down_at 11522 last_clean_interval [11518,11522) 192.168.99.253:6802/23688 192.168.99.253:6803/23688 192.168.99.253:6805/23688 192.168.99.253:6807/23688 exists,up 2817a124-1cf1-4e55-bf03-7114f105094b 
 osd.7 up     in    weight 1 up_from 11520 up_thru 11524 down_at 11514 last_clean_interval [11279,11514) 192.168.99.252:6814/27045 192.168.99.252:6817/27045 192.168.99.252:6818/27045 192.168.99.252:6819/27045 exists,up 8f370604-7ab8-448f-9b07-b4833b79029b 
 osd.8 up     in    weight 1 up_from 11275 up_thru 11295 down_at 11273 last_clean_interval [10644,11272) 192.168.99.247:6800/2122 192.168.99.247:6804/2122 192.168.99.247:6806/2122 192.168.99.247:6807/2122 exists,up 7d520969-4013-4e6d-b0e6-6e61811c59ba 
 osd.9 up     in    weight 1 up_from 10688 up_thru 11295 down_at 10683 last_clean_interval [10656,10687) 192.168.99.247:6805/23390 192.168.99.247:6815/1023390 192.168.99.247:6816/1023390 192.168.99.247:6817/1023390 exists,up 74aea5c4-6b80-4c04-ba9f-fc46559a7731 
 osd.10 up     in    weight 1 up_from 10691 up_thru 11295 down_at 10690 last_clean_interval [10667,10690) 192.168.99.247:6810/23828 192.168.99.247:6801/1023828 192.168.99.247:6802/1023828 192.168.99.247:6803/1023828 exists,up 64dcd784-7b67-4cb0-8e09-a534bd4e16a9 
 osd.11 up     out weight 0 up_from 11472 up_thru 11431 down_at 11438 last_clean_interval [10990,11432) 192.168.99.247:6811/3800 192.168.99.247:6812/3800 192.168.99.247:6813/3800 192.168.99.247:6818/3800 exists,up f815a676-328d-495d-92a2-08e90600476c 

 </pre> 
 the incomplete pgs 
 <pre> 
 # ceph pg dump | grep incomp 
 dumped all in format plain 
 57.45 	 0 	 0 	 0 	 0 	 0 	 3024 	 3024 	 incomplete 	 2014-10-12 13:03:40.176013 	 11383'2928285 	 11525:89 	 [5,7] 	 5 	 [5,7] 	 5 	 11160'2928073 	 2014-10-11 13:09:30.225188 	 5472'2376086 	 2014-10-06 09:39:26.255379 
 57.4d 	 107 	 0 	 0 	 0 	 425882624 	 3016 	 3016 	 incomplete 	 2014-10-12 15:04:22.288079 	 11382'1290720 	 11525:1513967 	 [4,6] 	 4 	 [4,6] 	 4 	 9562'1285805 	 2014-10-10 07:13:28.134023 	 5472'1072841 	 2014-10-06 09:39:53.120255 
 57.49 	 0 	 0 	 0 	 0 	 0 	 3021 	 3021 	 incomplete 	 2014-10-12 13:03:37.098606 	 11383'2325270 	 11525:92 	 [5,4] 	 5 	 [5,4] 	 5 	 11246'2323387 	 2014-10-11 13:09:33.217405 	 5472'2049390 	 2014-10-06 09:39:42.880334 
 57.57 	 102 	 0 	 0 	 0 	 407614976 	 3001 	 3001 	 incomplete 	 2014-10-12 15:04:22.288993 	 11246'3091550 	 11525:4803827 	 [7,6] 	 7 	 [7,6] 	 7 	 11160'3091535 	 2014-10-11 13:09:38.220735 	 5472'2533219 	 2014-10-06 09:40:10.493130 
 57.54 	 80 	 0 	 0 	 0 	 317466112 	 3000 	 3000 	 incomplete 	 2014-10-12 15:04:22.288929 	 11246'4317937 	 11525:131 	 [7,6] 	 7 	 [7,6] 	 7 	 11088'4317863 	 2014-10-11 13:04:56.179800 	 4912'3453941 	 2014-10-05 22:07:17.259052 
 57.52 	 122 	 0 	 0 	 0 	 474496000 	 3002 	 3002 	 incomplete 	 2014-10-12 15:04:22.287801 	 11383'2283797 	 11525:3387862 	 [4,6] 	 4 	 [4,6] 	 4 	 9562'2276867 	 2014-10-10 07:11:44.747698 	 4920'1853187 	 2014-10-05 22:36:49.071083 
 57.5f 	 71 	 0 	 0 	 0 	 281724928 	 3018 	 3018 	 incomplete 	 2014-10-12 15:04:22.288625 	 11380'2377443 	 11525:134 	 [7,6] 	 7 	 [7,6] 	 7 	 11246'2377374 	 2014-10-11 13:09:50.208927 	 5472'2010172 	 2014-10-06 09:40:27.276794 
 57.5b 	 103 	 0 	 0 	 0 	 418585088 	 3129 	 3129 	 incomplete 	 2014-10-12 15:04:22.287022 	 11383'1372861 	 11525:1896591 	 [4,6] 	 4 	 [4,6] 	 4 	 9562'1365833 	 2014-10-10 07:11:56.754171 	 4920'1004469 	 2014-10-05 23:04:09.049999 
 57.58 	 90 	 0 	 0 	 0 	 367459328 	 3022 	 3022 	 incomplete 	 2014-10-12 15:04:22.293004 	 11383'3258127 	 11525:4494642 	 [6,7] 	 6 	 [6,7] 	 6 	 11242'3257441 	 2014-10-11 11:57:35.264780 	 5472'2315412 	 2014-10-06 09:40:13.848164 
 57.6c 	 105 	 0 	 0 	 0 	 407285760 	 3006 	 3006 	 incomplete 	 2014-10-12 15:04:22.294497 	 11380'2104028 	 11525:3100729 	 [6,7] 	 6 	 [6,7] 	 6 	 11196'2103377 	 2014-10-11 13:05:04.192729 	 4920'1660415 	 2014-10-05 23:04:12.690151 
 57.6a 	 101 	 0 	 0 	 0 	 383593472 	 3006 	 3006 	 incomplete 	 2014-10-12 15:04:22.284057 	 11383'3621006 	 11525:4133666 	 [7,6] 	 7 	 [7,6] 	 7 	 9562'3608082 	 2014-10-10 07:26:22.590929 	 5472'2959153 	 2014-10-06 09:42:47.872933 
 57.6b 	 104 	 0 	 0 	 0 	 402800640 	 3006 	 3006 	 incomplete 	 2014-10-12 15:04:22.284007 	 11321'2436976 	 11525:2788229 	 [7,6] 	 7 	 [7,6] 	 7 	 9562'2429526 	 2014-10-10 07:26:23.656336 	 5472'2019113 	 2014-10-06 09:42:52.973654 
 57.73 	 104 	 0 	 0 	 0 	 407564800 	 3085 	 3085 	 incomplete 	 2014-10-12 15:04:22.295758 	 11383'3945568 	 11525:5355074 	 [6,7] 	 6 	 [6,7] 	 6 	 11242'3945130 	 2014-10-11 11:57:29.267904 	 4920'3157497 	 2014-10-05 23:10:41.936011 
 57.70 	 97 	 0 	 0 	 0 	 387794432 	 3008 	 3008 	 incomplete 	 2014-10-12 15:04:22.283750 	 11383'3019416 	 11525:3437715 	 [7,6] 	 7 	 [7,6] 	 7 	 9562'3011173 	 2014-10-10 07:26:25.609721 	 5472'2538910 	 2014-10-06 09:42:54.577364 
 57.7d 	 0 	 0 	 0 	 0 	 0 	 3000 	 3000 	 incomplete 	 2014-10-12 15:04:22.278409 	 11246'2874459 	 11525:128 	 [4,6] 	 4 	 [4,6] 	 4 	 11196'2874351 	 2014-10-11 13:07:17.195815 	 4912'2380009 	 2014-10-05 22:08:35.148770 
 57.78 	 92 	 0 	 0 	 0 	 352608256 	 3093 	 3093 	 incomplete 	 2014-10-12 15:04:22.292041 	 11355'3148441 	 11525:4904857 	 [6,7] 	 6 	 [6,7] 	 6 	 11190'3146700 	 2014-10-11 13:09:53.219823 	 5472'2637276 	 2014-10-06 09:41:33.843191 
 57.a 	 106 	 0 	 0 	 0 	 415744000 	 3008 	 3008 	 incomplete 	 2014-10-12 15:04:22.288294 	 11383'2843305 	 11525:4014217 	 [6,7] 	 6 	 [6,7] 	 6 	 11196'2841877 	 2014-10-11 11:56:36.257106 	 4912'2236546 	 2014-10-05 22:09:47.574945 
 57.b 	 104 	 0 	 0 	 0 	 402772480 	 3009 	 3009 	 incomplete 	 2014-10-12 15:04:22.276535 	 11327'4868708 	 11525:5548142 	 [7,6] 	 7 	 [7,6] 	 7 	 9562'4852177 	 2014-10-10 07:12:58.470211 	 5472'3989692 	 2014-10-06 09:38:23.216916 
 57.11 	 89 	 0 	 0 	 0 	 345584128 	 3034 	 3034 	 incomplete 	 2014-10-12 15:04:22.264816 	 11383'3080713 	 11525:3510446 	 [7,6] 	 7 	 [7,6] 	 7 	 9562'3070868 	 2014-10-10 07:13:02.450229 	 4617'2288876 	 2014-10-04 11:59:06.860953 
 57.18 	 88 	 0 	 0 	 0 	 347055104 	 3036 	 3036 	 incomplete 	 2014-10-12 15:04:22.264731 	 11380'2366715 	 11525:2793714 	 [7,6] 	 7 	 [7,6] 	 7 	 9562'2358565 	 2014-10-10 07:13:05.293096 	 4676'1879708 	 2014-10-05 06:53:28.577361 
 57.23 	 81 	 0 	 0 	 0 	 323639296 	 3008 	 3008 	 incomplete 	 2014-10-12 15:04:22.265808 	 11341'3106227 	 11525:4222290 	 [7,6] 	 7 	 [7,6] 	 7 	 9562'3092943 	 2014-10-10 07:13:10.427265 	 4676'2347345 	 2014-10-05 09:49:43.873196 
 57.20 	 0 	 0 	 0 	 0 	 0 	 3000 	 3000 	 incomplete 	 2014-10-12 15:04:22.268349 	 11246'4625545 	 11525:66 	 [4,6] 	 4 	 [4,6] 	 4 	 11176'4625539 	 2014-10-11 13:07:59.196536 	 5472'3895324 	 2014-10-06 09:38:47.821879 
 57.2d 	 93 	 0 	 0 	 0 	 370021888 	 3013 	 3013 	 incomplete 	 2014-10-12 15:04:22.268234 	 11380'3140309 	 11525:3598658 	 [4,6] 	 4 	 [4,6] 	 4 	 9562'3129985 	 2014-10-10 07:13:11.065580 	 4617'2439996 	 2014-10-04 16:43:09.684486 
 57.2b 	 93 	 0 	 0 	 0 	 357371904 	 3007 	 3007 	 incomplete 	 2014-10-12 15:04:22.291278 	 11330'1972547 	 11525:2741589 	 [6,4] 	 6 	 [6,4] 	 6 	 11190'1972453 	 2014-10-11 13:04:54.181847 	 4912'1604374 	 2014-10-05 22:10:01.370795 
 57.29 	 107 	 0 	 0 	 0 	 412491776 	 3049 	 3049 	 incomplete 	 2014-10-12 15:04:22.268111 	 11361'3844969 	 11525:4520192 	 [4,6] 	 4 	 [4,6] 	 4 	 9562'3832985 	 2014-10-10 07:13:08.097025 	 4617'2753765 	 2014-10-04 16:20:57.141537 
 57.36 	 0 	 0 	 0 	 0 	 0 	 3009 	 3009 	 incomplete 	 2014-10-12 15:04:22.264620 	 11361'1583414 	 11525:134 	 [7,6] 	 7 	 [7,6] 	 7 	 11194'1583347 	 2014-10-11 13:07:10.186546 	 4617'1207617 	 2014-10-04 16:30:15.292302 
 57.37 	 0 	 0 	 0 	 0 	 0 	 3025 	 3025 	 incomplete 	 2014-10-12 15:04:22.269302 	 11355'3189349 	 11525:127 	 [4,6] 	 4 	 [4,6] 	 4 	 11190'3189080 	 2014-10-11 13:09:27.206422 	 5472'2725528 	 2014-10-06 09:39:21.158931 
 57.35 	 113 	 0 	 0 	 0 	 437386752 	 3094 	 3094 	 incomplete 	 2014-10-12 15:04:22.285744 	 11343'3139296 	 11525:4311602 	 [6,4] 	 6 	 [6,4] 	 6 	 11176'3135768 	 2014-10-11 11:57:03.266674 	 4912'2457747 	 2014-10-05 22:06:42.371162 
 57.33 	 83 	 0 	 0 	 0 	 327238656 	 3010 	 3010 	 incomplete 	 2014-10-12 15:04:22.298327 	 11380'2644031 	 11525:3624374 	 [6,4] 	 6 	 [6,4] 	 6 	 11086'2644013 	 2014-10-11 13:09:23.212591 	 5472'2227196 	 2014-10-06 09:39:10.753359 
 57.30 	 0 	 0 	 0 	 0 	 0 	 3001 	 3001 	 incomplete 	 2014-10-12 15:04:22.291716 	 11311'5192424 	 11525:133 	 [7,6] 	 7 	 [7,6] 	 7 	 11160'5192402 	 2014-10-11 13:04:55.174389 	 4912'4285756 	 2014-10-05 22:06:38.663218 
 57.3f 	 0 	 0 	 0 	 0 	 0 	 3015 	 3015 	 incomplete 	 2014-10-12 15:04:22.294056 	 11330'2254046 	 11525:128 	 [4,6] 	 4 	 [4,6] 	 4 	 11196'2253714 	 2014-10-11 13:07:13.189303 	 5472'1900357 	 2014-10-06 09:39:39.505152 
 57.3b 	 87 	 0 	 0 	 0 	 335044608 	 3048 	 3048 	 incomplete 	 2014-10-12 15:04:22.293621 	 11383'3473261 	 11525:5086524 	 [4,6] 	 4 	 [4,6] 	 4 	 9562'3466127 	 2014-10-10 07:11:05.591555 	 4920'3000743 	 2014-10-05 22:23:58.285068 
 57.38 	 88 	 0 	 0 	 0 	 336330752 	 3005 	 3005 	 incomplete 	 2014-10-12 15:04:22.293527 	 11380'2471507 	 11525:3803126 	 [4,6] 	 4 	 [4,6] 	 4 	 11242'2471465 	 2014-10-11 13:09:29.202288 	 5472'2089056 	 2014-10-06 09:39:23.926758 
 </pre> 
 and <b>ceph pg 57.7d query</b> is attached. 
 <pre> 
 # ceph osd crush rule dump 
 [ 
     { "rule_id": 0, 
       "rule_name": "replicated_ruleset", 
       "ruleset": 0, 
       "type": 1, 
       "min_size": 1, 
       "max_size": 10, 
       "steps": [ 
             { "op": "take", 
               "item": -1, 
               "item_name": "default"}, 
             { "op": "chooseleaf_firstn", 
               "num": 0, 
               "type": "host"}, 
             { "op": "emit"}]}, 
     { "rule_id": 1, 
       "rule_name": "ssd_replicated_ruleset", 
       "ruleset": 1, 
       "type": 1, 
       "min_size": 1, 
       "max_size": 10, 
       "steps": [ 
             { "op": "take", 
               "item": -5, 
               "item_name": "ssd"}, 
             { "op": "chooseleaf_firstn", 
               "num": 0, 
               "type": "host"}, 
             { "op": "emit"}]}] 
 </pre> 

Back