Bug #57185
EC 4+2 PG stuck in activating+degraded+remapped
0%
Description
- PG Query
{ "snap_trimq": "[]", "snap_trimq_len": 0, "state": "activating+degraded+remapped", "epoch": 5861, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "backfill_targets": [ "14(0)" ], "acting_recovery_backfill": [ "4(0)", "8(3)", "14(0)", "41(2)", "51(1)", "117(4)", "156(5)" ], "info": { "pgid": "9.c78s0", "last_update": "5210'48054", "last_complete": "4727'47329", "log_tail": "4722'45161", "last_user_version": 48054, "last_backfill": "MAX", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "5210'48054", "reported_seq": 159430, "reported_epoch": 5861, "state": "activating+degraded+remapped", "last_fresh": "2022-08-18T17:20:04.492434+0000", "last_change": "2022-08-18T16:34:47.692664+0000", "last_active": "2022-08-18T16:34:47.612904+0000", "last_peered": "2022-08-18T16:31:53.399844+0000", "last_clean": "2022-08-18T13:34:41.913797+0000", "last_became_active": "2022-08-18T14:58:53.809588+0000", "last_became_peered": "2022-08-18T14:58:53.809588+0000", "last_unstale": "2022-08-18T17:20:04.492434+0000", "last_undegraded": "2022-08-18T16:34:47.625898+0000", "last_fullsized": "2022-08-18T17:20:04.492434+0000", "mapping_epoch": 5215, "log_start": "4722'45161", "ondisk_log_start": "4722'45161", "created": 1990, "last_epoch_clean": 4685, "parent": "0.0", "parent_split_bits": 12, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "objects_scrubbed": 0, "log_size": 2893, "ondisk_log_size": 2893, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 1, "scrub_schedule": "queued for scrub", "scrub_duration": 0.0023479350000000002, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 2147351278, "num_objects": 45568, "num_object_clones": 0, "num_object_copies": 273408, "num_objects_missing_on_primary": 683, "num_objects_missing": 683, "num_objects_degraded": 683, "num_objects_misplaced": 44885, "num_objects_unfound": 0, "num_objects_dirty": 45568, "num_whiteouts": 0, "num_read": 86096, "num_read_kb": 1035336, "num_write": 562975, "num_write_kb": 2182704, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 56122, "num_bytes_recovered": 2654949610, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "avail_no_missing": [ "2(0)", "8(3)", "41(2)", "51(1)", "64(0)", "117(4)", "156(5)" ], "object_location_counts": [ { "shards": "2(0),8(3),41(2),51(1),64(0),117(4),156(5)", "objects": 72 }, { "shards": "2(0),8(3),41(2),51(1),117(4),156(5)", "objects": 32 }, { "shards": "4(0),8(3),41(2),51(1),117(4),156(5)", "objects": 44964 }, { "shards": "8(3),41(2),51(1),117(4),156(5)", "objects": 500 } ], "blocked_by": [], "up_primary": 14, "acting_primary": 4, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 5218, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, "peer_info": [ { "peer": "2(0)", "pgid": "9.c78s0", "last_update": "5210'48054", "last_complete": "5210'48054", "log_tail": "4724'46503", "last_user_version": 48054, "last_backfill": "9:1e32a2e7:::59178bc5-0d1c-4138-85c4-42b6340da0d1.188855.3_TTcgroas%2f1580272.tl0uQj2FjJApUtlA.rnd:head", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "0'0", "reported_seq": 1, "reported_epoch": 5138, "state": "peering", "last_fresh": "2022-08-18T14:59:15.543131+0000", "last_change": "2022-08-18T14:59:15.543131+0000", "last_active": "0.000000", "last_peered": "0.000000", "last_clean": "0.000000", "last_became_active": "0.000000", "last_became_peered": "0.000000", "last_unstale": "2022-08-18T14:59:15.543131+0000", "last_undegraded": "2022-08-18T14:59:15.543131+0000", "last_fullsized": "2022-08-18T14:59:15.543131+0000", "mapping_epoch": 5215, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 1990, "last_epoch_clean": 4685, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "objects_scrubbed": 0, "log_size": 0, "ondisk_log_size": 0, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 0, "scrub_schedule": "queued for scrub", "scrub_duration": 0, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 2148793070, "num_objects": 7520, "num_object_clones": 0, "num_object_copies": 0, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 7520, "num_whiteouts": 0, "num_read": 4, "num_read_kb": 6, "num_write": 38, "num_write_kb": 21, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "avail_no_missing": [], "object_location_counts": [], "blocked_by": [ 8, 41, 51, 117, 156 ], "up_primary": 14, "acting_primary": 4, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 1, "last_epoch_started": 5140, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "8(3)", "pgid": "9.c78s3", "last_update": "5210'48054", "last_complete": "5210'48054", "log_tail": "4724'46503", "last_user_version": 48054, "last_backfill": "MAX", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "5210'48054", "reported_seq": 158780, "reported_epoch": 5215, "state": "active+undersized+degraded+remapped+backfilling", "last_fresh": "2022-08-18T16:31:53.399844+0000", "last_change": "2022-08-18T14:58:54.061043+0000", "last_active": "2022-08-18T16:31:53.399844+0000", "last_peered": "2022-08-18T16:31:53.399844+0000", "last_clean": "2022-08-18T13:34:41.913797+0000", "last_became_active": "2022-08-18T14:58:53.809588+0000", "last_became_peered": "2022-08-18T14:58:53.809588+0000", "last_unstale": "2022-08-18T16:31:53.399844+0000", "last_undegraded": "2022-08-18T14:58:53.542830+0000", "last_fullsized": "2022-08-18T14:58:53.532113+0000", "mapping_epoch": 5215, "log_start": "4724'46503", "ondisk_log_start": "4724'46503", "created": 1990, "last_epoch_clean": 4685, "parent": "0.0", "parent_split_bits": 12, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "objects_scrubbed": 0, "log_size": 1551, "ondisk_log_size": 1551, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 1, "scrub_schedule": "queued for scrub", "scrub_duration": 0.0023479350000000002, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 2147351278, "num_objects": 45568, "num_object_clones": 0, "num_object_copies": 273408, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 38071, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 45568, "num_whiteouts": 0, "num_read": 86096, "num_read_kb": 1035336, "num_write": 562975, "num_write_kb": 2182704, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 56122, "num_bytes_recovered": 2654949610, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "avail_no_missing": [ "51(1)", "8(3)", "41(2)", "117(4)", "156(5)" ], "object_location_counts": [ { "shards": "8(3),41(2),51(1),117(4),156(5)", "objects": 45568 } ], "blocked_by": [], "up_primary": 14, "acting_primary": 4, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 5140, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "14(0)", "pgid": "9.c78s0", "last_update": "5210'48054", "last_complete": "5210'48054", "log_tail": "4722'45161", "last_user_version": 0, "last_backfill": "MIN", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "0'0", "reported_seq": 0, "reported_epoch": 0, "state": "unknown", "last_fresh": "0.000000", "last_change": "0.000000", "last_active": "0.000000", "last_peered": "0.000000", "last_clean": "0.000000", "last_became_active": "0.000000", "last_became_peered": "0.000000", "last_unstale": "0.000000", "last_undegraded": "0.000000", "last_fullsized": "0.000000", "mapping_epoch": 0, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 0, "last_epoch_clean": 0, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "0'0", "last_scrub_stamp": "0.000000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "0.000000", "last_clean_scrub_stamp": "0.000000", "objects_scrubbed": 0, "log_size": 0, "ondisk_log_size": 0, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 0, "scrub_schedule": "--", "scrub_duration": 0, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 0, "num_objects": 0, "num_object_clones": 0, "num_object_copies": 0, "num_objects_missing_on_primary": 0, "num_objects_missing": 45568, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 0, "num_whiteouts": 0, "num_read": 0, "num_read_kb": 0, "num_write": 0, "num_write_kb": 0, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [], "acting": [], "avail_no_missing": [], "object_location_counts": [], "blocked_by": [], "up_primary": -1, "acting_primary": -1, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 1, "last_epoch_started": 5218, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "41(2)", "pgid": "9.c78s2", "last_update": "5210'48054", "last_complete": "5210'48054", "log_tail": "4724'46503", "last_user_version": 48054, "last_backfill": "MAX", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "5210'48054", "reported_seq": 188248, "reported_epoch": 5210, "state": "active+undersized+degraded+remapped+backfilling", "last_fresh": "2022-08-18T16:31:53.399844+0000", "last_change": "2022-08-18T14:58:54.061043+0000", "last_active": "2022-08-18T16:31:53.399844+0000", "last_peered": "2022-08-18T16:31:53.399844+0000", "last_clean": "2022-08-18T13:34:41.913797+0000", "last_became_active": "2022-08-18T14:58:53.809588+0000", "last_became_peered": "2022-08-18T14:58:53.809588+0000", "last_unstale": "2022-08-18T16:31:53.399844+0000", "last_undegraded": "2022-08-18T14:58:53.542830+0000", "last_fullsized": "2022-08-18T14:58:53.532113+0000", "mapping_epoch": 5215, "log_start": "4724'46503", "ondisk_log_start": "4724'46503", "created": 1990, "last_epoch_clean": 4685, "parent": "0.0", "parent_split_bits": 12, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "objects_scrubbed": 0, "log_size": 1551, "ondisk_log_size": 1551, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 1, "scrub_schedule": "queued for scrub", "scrub_duration": 0.0023479350000000002, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 2147351278, "num_objects": 45568, "num_object_clones": 0, "num_object_copies": 273408, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 38071, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 45568, "num_whiteouts": 0, "num_read": 86096, "num_read_kb": 1035336, "num_write": 562975, "num_write_kb": 2182704, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 56122, "num_bytes_recovered": 2654949610, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "avail_no_missing": [ "51(1)", "8(3)", "41(2)", "117(4)", "156(5)" ], "object_location_counts": [ { "shards": "8(3),41(2),51(1),117(4),156(5)", "objects": 45568 } ], "blocked_by": [], "up_primary": 14, "acting_primary": 4, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 5140, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "51(1)", "pgid": "9.c78s1", "last_update": "5210'48054", "last_complete": "5210'48054", "log_tail": "4724'46503", "last_user_version": 48054, "last_backfill": "MAX", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "5210'48054", "reported_seq": 188294, "reported_epoch": 5212, "state": "active+undersized+degraded+remapped+backfilling", "last_fresh": "2022-08-18T16:32:22.161926+0000", "last_change": "2022-08-18T14:58:54.061043+0000", "last_active": "2022-08-18T16:32:22.161926+0000", "last_peered": "2022-08-18T16:32:22.161926+0000", "last_clean": "2022-08-18T13:34:41.913797+0000", "last_became_active": "2022-08-18T14:58:53.809588+0000", "last_became_peered": "2022-08-18T14:58:53.809588+0000", "last_unstale": "2022-08-18T16:32:22.161926+0000", "last_undegraded": "2022-08-18T14:58:53.542830+0000", "last_fullsized": "2022-08-18T14:58:53.532113+0000", "mapping_epoch": 5215, "log_start": "4724'46503", "ondisk_log_start": "4724'46503", "created": 1990, "last_epoch_clean": 4685, "parent": "0.0", "parent_split_bits": 12, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "objects_scrubbed": 0, "log_size": 1551, "ondisk_log_size": 1551, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 1, "scrub_schedule": "queued for scrub", "scrub_duration": 0.0023479350000000002, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 2147351278, "num_objects": 45568, "num_object_clones": 0, "num_object_copies": 273408, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 38048, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 45568, "num_whiteouts": 0, "num_read": 86096, "num_read_kb": 1035336, "num_write": 562975, "num_write_kb": 2182704, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 56144, "num_bytes_recovered": 2656143919, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "avail_no_missing": [ "51(1)", "8(3)", "41(2)", "117(4)", "156(5)" ], "object_location_counts": [ { "shards": "8(3),41(2),51(1),117(4),156(5)", "objects": 45568 } ], "blocked_by": [], "up_primary": 14, "acting_primary": 4, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 5140, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "64(0)", "pgid": "9.c78s0", "last_update": "4973'48000", "last_complete": "4973'48000", "log_tail": "4724'46128", "last_user_version": 48000, "last_backfill": "9:1e3213f3:::59178bc5-0d1c-4138-85c4-42b6340da0d1.188831.2_LkZrS(rv%2f1416931.q)Ojo8f06c3YPl46.rnd:head", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "0'0", "reported_seq": 1, "reported_epoch": 4913, "state": "peering", "last_fresh": "2022-08-18T13:48:06.401417+0000", "last_change": "2022-08-18T13:48:06.401417+0000", "last_active": "0.000000", "last_peered": "0.000000", "last_clean": "0.000000", "last_became_active": "0.000000", "last_became_peered": "0.000000", "last_unstale": "2022-08-18T13:48:06.401417+0000", "last_undegraded": "2022-08-18T13:48:06.401417+0000", "last_fullsized": "2022-08-18T13:48:06.401417+0000", "mapping_epoch": 5215, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 1990, "last_epoch_clean": 4685, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "objects_scrubbed": 0, "log_size": 0, "ondisk_log_size": 0, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 0, "scrub_schedule": "queued for scrub", "scrub_duration": 0, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 286374799, "num_objects": 5927, "num_object_clones": 0, "num_object_copies": 0, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 5927, "num_whiteouts": 0, "num_read": 0, "num_read_kb": 0, "num_write": 24, "num_write_kb": 207, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "avail_no_missing": [], "object_location_counts": [], "blocked_by": [ 8, 41, 51, 117, 156 ], "up_primary": 14, "acting_primary": 4, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 1, "last_epoch_started": 4915, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "117(4)", "pgid": "9.c78s4", "last_update": "5210'48054", "last_complete": "5210'48054", "log_tail": "4724'46503", "last_user_version": 48054, "last_backfill": "MAX", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "5210'48054", "reported_seq": 188248, "reported_epoch": 5210, "state": "active+undersized+degraded+remapped+backfilling", "last_fresh": "2022-08-18T16:31:53.399844+0000", "last_change": "2022-08-18T14:58:54.061043+0000", "last_active": "2022-08-18T16:31:53.399844+0000", "last_peered": "2022-08-18T16:31:53.399844+0000", "last_clean": "2022-08-18T13:34:41.913797+0000", "last_became_active": "2022-08-18T14:58:53.809588+0000", "last_became_peered": "2022-08-18T14:58:53.809588+0000", "last_unstale": "2022-08-18T16:31:53.399844+0000", "last_undegraded": "2022-08-18T14:58:53.542830+0000", "last_fullsized": "2022-08-18T14:58:53.532113+0000", "mapping_epoch": 5215, "log_start": "4724'46503", "ondisk_log_start": "4724'46503", "created": 1990, "last_epoch_clean": 4685, "parent": "0.0", "parent_split_bits": 12, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "objects_scrubbed": 0, "log_size": 1551, "ondisk_log_size": 1551, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 1, "scrub_schedule": "queued for scrub", "scrub_duration": 0.0023479350000000002, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 2147351278, "num_objects": 45568, "num_object_clones": 0, "num_object_copies": 273408, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 38071, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 45568, "num_whiteouts": 0, "num_read": 86096, "num_read_kb": 1035336, "num_write": 562975, "num_write_kb": 2182704, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 56122, "num_bytes_recovered": 2654949610, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "avail_no_missing": [ "51(1)", "8(3)", "41(2)", "117(4)", "156(5)" ], "object_location_counts": [ { "shards": "8(3),41(2),51(1),117(4),156(5)", "objects": 45568 } ], "blocked_by": [], "up_primary": 14, "acting_primary": 4, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 5140, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "156(5)", "pgid": "9.c78s5", "last_update": "5210'48054", "last_complete": "5210'48054", "log_tail": "4724'46503", "last_user_version": 48054, "last_backfill": "MAX", "purged_snaps": [], "history": { "epoch_created": 1990, "epoch_pool_created": 1772, "last_epoch_started": 5140, "last_interval_started": 5139, "last_epoch_clean": 4685, "last_interval_clean": 4684, "last_epoch_split": 1990, "last_epoch_marked_full": 0, "same_up_since": 5215, "same_interval_since": 5215, "same_primary_since": 5213, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "prior_readable_until_ub": 0 }, "stats": { "version": "5210'48054", "reported_seq": 188248, "reported_epoch": 5210, "state": "active+undersized+degraded+remapped+backfilling", "last_fresh": "2022-08-18T16:31:53.399844+0000", "last_change": "2022-08-18T14:58:54.061043+0000", "last_active": "2022-08-18T16:31:53.399844+0000", "last_peered": "2022-08-18T16:31:53.399844+0000", "last_clean": "2022-08-18T13:34:41.913797+0000", "last_became_active": "2022-08-18T14:58:53.809588+0000", "last_became_peered": "2022-08-18T14:58:53.809588+0000", "last_unstale": "2022-08-18T16:31:53.399844+0000", "last_undegraded": "2022-08-18T14:58:53.542830+0000", "last_fullsized": "2022-08-18T14:58:53.532113+0000", "mapping_epoch": 5215, "log_start": "4724'46503", "ondisk_log_start": "4724'46503", "created": 1990, "last_epoch_clean": 4685, "parent": "0.0", "parent_split_bits": 12, "last_scrub": "0'0", "last_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "2022-08-16T17:02:35.249566+0000", "last_clean_scrub_stamp": "2022-08-16T17:21:58.068345+0000", "objects_scrubbed": 0, "log_size": 1551, "ondisk_log_size": 1551, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": false, "snaptrimq_len": 0, "last_scrub_duration": 1, "scrub_schedule": "queued for scrub", "scrub_duration": 0.0023479350000000002, "objects_trimmed": 0, "snaptrim_duration": 0, "stat_sum": { "num_bytes": 2147351278, "num_objects": 45568, "num_object_clones": 0, "num_object_copies": 273408, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 38071, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 45568, "num_whiteouts": 0, "num_read": 86096, "num_read_kb": 1035336, "num_write": 562975, "num_write_kb": 2182704, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 56122, "num_bytes_recovered": 2654949610, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "avail_no_missing": [ "51(1)", "8(3)", "41(2)", "117(4)", "156(5)" ], "object_location_counts": [ { "shards": "8(3),41(2),51(1),117(4),156(5)", "objects": 45568 } ], "blocked_by": [], "up_primary": 14, "acting_primary": 4, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 5140, "hit_set_history": { "current_last_update": "0'0", "history": [] } } ], "recovery_state": [ { "name": "Started/Primary/Active", "enter_time": "2022-08-18T16:34:47.612959+0000", "might_have_unfound": [ { "osd": "2(0)", "status": "already probed" }, { "osd": "8(3)", "status": "already probed" }, { "osd": "14(0)", "status": "already probed" }, { "osd": "41(2)", "status": "already probed" }, { "osd": "51(1)", "status": "already probed" }, { "osd": "64(0)", "status": "already probed" }, { "osd": "117(4)", "status": "already probed" }, { "osd": "156(5)", "status": "already probed" } ], "recovery_progress": { "backfill_targets": [ "14(0)" ], "waiting_on_backfill": [], "last_backfill_started": "MIN", "backfill_info": { "begin": "MIN", "end": "MIN", "objects": [] }, "peer_backfill_info": [], "backfills_in_flight": [], "recovering": [], "pg_backend": { "recovery_ops": [], "read_ops": [] } } }, { "name": "Started", "enter_time": "2022-08-18T16:34:43.465216+0000" } ], "agent_state": {} }
- Pool details
root@f28-h28-000-r630:~/tracker57185 # ceph osd pool ls detail pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 203 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr pool 2 '.rgw.root' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 1780 lfor 0/0/403 flags hashpspool stripe_width 0 application rgw pool 8 'default.rgw.control' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 128 pgp_num 128 autoscale_mode on last_change 1786 lfor 0/0/1779 flags hashpspool stripe_width 0 pg_num_min 128 application rgw pool 9 'default.rgw.buckets.data' erasure profile myprofile size 6 min_size 4 crush_rule 1 object_hash rjenkins pg_num 4096 pgp_num 4096 autoscale_mode on last_change 4684 lfor 0/0/2046 flags hashpspool stripe_width 16384 pg_num_min 4096 application rgw pool 10 'default.rgw.buckets.index' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 256 pgp_num 256 autoscale_mode on last_change 1790 lfor 0/0/1788 flags hashpspool stripe_width 0 pg_num_min 256 application rgw pool 11 'default.rgw.log' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 128 pgp_num 128 autoscale_mode on last_change 1786 lfor 0/0/1781 flags hashpspool stripe_width 0 pg_num_min 128 application rgw pool 12 'default.rgw.meta' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 128 pgp_num 128 autoscale_mode on last_change 1805 lfor 0/0/1801 flags hashpspool stripe_width 0 pg_num_min 128 application rgw
Related issues
History
#1 Updated by Vikhyat Umrao about 1 year ago
- Assignee deleted (
Sunny Kumar)
#2 Updated by Vikhyat Umrao about 1 year ago
- Affected Versions v17.2.1 added
#3 Updated by Vikhyat Umrao about 1 year ago
- Description updated (diff)
#4 Updated by Vikhyat Umrao about 1 year ago
From Cluster logs:
2022-08-18T17:28:27.391042+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50376 : cluster [DBG] pgmap v51959: 4769 pgs: 1 activating+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 683/1116163251 objects degraded (0.000%); 44885/1116163251 objects misplaced (0.004%) 2022-08-18T17:28:29.398626+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50377 : cluster [DBG] pgmap v51960: 4769 pgs: 1 activating+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 683/1116163251 objects degraded (0.000%); 44885/1116163251 objects misplaced (0.004%) 2022-08-18T17:28:31.426842+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50378 : cluster [DBG] pgmap v51961: 4769 pgs: 1 activating+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 683/1116163251 objects degraded (0.000%); 44885/1116163251 objects misplaced (0.004%) 2022-08-18T17:28:33.435305+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50379 : cluster [DBG] pgmap v51962: 4769 pgs: 1 activating+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 683/1116163251 objects degraded (0.000%); 44885/1116163251 objects misplaced (0.004%) 2022-08-18T17:28:35.463829+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50380 : cluster [DBG] pgmap v51963: 4769 pgs: 1 activating+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 683/1116163251 objects degraded (0.000%); 44885/1116163251 objects misplaced (0.004%) 2022-08-18T17:28:37.474617+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50381 : cluster [DBG] pgmap v51964: 4769 pgs: 1 activating+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 683/1116163251 objects degraded (0.000%); 44885/1116163251 objects misplaced (0.004%)
- Then marking OSD.14 fixed it.
2022-08-18T17:28:39.221447+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43282 : audit [INF] from='client.? 172.16.45.51:0/1155453372' entity='client.admin' cmd=[{"prefix": "osd down", "ids": ["14"]}]: dispatch 2022-08-18T17:28:39.450766+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43284 : audit [INF] from='client.? 172.16.45.51:0/1155453372' entity='client.admin' cmd='[{"prefix": "osd down", "ids": ["14"]}]': finished
2022-08-18T17:28:41.490534+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50383 : cluster [DBG] pgmap v51968: 4769 pgs: 1 activating+undersized, 5 activating+undersized+degraded, 10 active+undersized+degraded+wait, 1 active+undersized+wait, 102 peering, 1 activating+degraded+remapped, 3 stale+active+clean, 4646 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 727339/1116163251 objects degraded (0.065%); 44885/1116163251 objects misplaced (0.004%); 0 B/s, 0 objects/s recovering 2022-08-18T17:28:42.365801+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43294 : cluster [WRN] Health check update: Reduced data availability: 65 pgs inactive, 102 pgs peering (PG_AVAILABILITY) 2022-08-18T17:28:42.365845+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43295 : cluster [WRN] Health check update: Degraded data redundancy: 727339/1116163251 objects degraded (0.065%), 16 pgs degraded, 9 pgs undersized (PG_DEGRADED) 2022-08-18T17:28:48.512232+0000 osd.4 (osd.4) 2903 : cluster [WRN] 3 slow requests (by type [ 'delayed' : 1 'started' : 2 ] most affected pool [ 'default.rgw.buckets.data' : 3 ]) 2022-08-18T17:28:49.559526+0000 osd.4 (osd.4) 2904 : cluster [WRN] 3 slow requests (by type [ 'started' : 3 ] most affected pool [ 'default.rgw.buckets.data' : 3 ]) 2022-08-18T17:28:43.501027+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50386 : cluster [DBG] pgmap v51969: 4769 pgs: 1 activating+undersized, 5 activating+undersized+degraded, 22 active+undersized+degraded+wait, 1 active+undersized+wait, 102 peering, 1 activating+degraded+remapped, 2 stale+active+clean, 4635 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 1227468/1116163251 objects degraded (0.110%); 44885/1116163251 objects misplaced (0.004%); 0 B/s, 0 objects/s recovering 2022-08-18T17:28:44.387744+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43300 : cluster [INF] osd.14 marked itself dead as of e5863 2022-08-18T17:28:50.538081+0000 osd.4 (osd.4) 2905 : cluster [WRN] 3 slow requests (by type [ 'started' : 3 ] most affected pool [ 'default.rgw.buckets.data' : 3 ]) 2022-08-18T17:28:50.885231+0000 osd.14 (osd.14) 49 : cluster [WRN] Monitor daemon marked osd.14 down, but it is still running 2022-08-18T17:28:50.885244+0000 osd.14 (osd.14) 50 : cluster [DBG] map e5863 wrongly marked me down at e5862 2022-08-18T17:28:44.867398+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43301 : cluster [INF] Health check cleared: OSD_DOWN (was: 1 osds down) 2022-08-18T17:28:44.876276+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43302 : cluster [INF] osd.14 [v2:172.16.45.91:6808/896701948,v1:172.16.45.91:6809/896701948] boot 2022-08-18T17:28:44.876351+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43303 : cluster [DBG] osdmap e5864: 192 total, 192 up, 192 in 2022-08-18T17:28:51.574143+0000 osd.4 (osd.4) 2906 : cluster [WRN] 1 slow requests (by type [ 'started' : 1 ] most affected pool [ 'default.rgw.buckets.data' : 1 ]) 2022-08-18T17:28:45.517586+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50387 : cluster [DBG] pgmap v51971: 4769 pgs: 1 active+recovery_wait+degraded+remapped, 1 activating+undersized, 5 activating+undersized+degraded, 92 active+undersized+degraded+wait, 5 active+undersized+wait, 46 peering, 4619 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 9.9 KiB/s rd, 86 KiB/s wr, 35 op/s; 4088862/1116163323 objects degraded (0.366%); 44897/1116163323 objects misplaced (0.004%); 0 B/s, 0 objects/s recovering 2022-08-18T17:28:45.727100+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43305 : cluster [WRN] Health check update: 238 slow ops, oldest one blocked for 3072 sec, osd.4 has slow ops (SLOW_OPS) 2022-08-18T17:28:45.904925+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43306 : cluster [DBG] osdmap e5865: 192 total, 192 up, 192 in 2022-08-18T17:28:52.539945+0000 osd.4 (osd.4) 2907 : cluster [WRN] 1 slow requests (by type [ 'started' : 1 ] most affected pool [ 'default.rgw.buckets.data' : 1 ]) 2022-08-18T17:28:46.920926+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43307 : cluster [DBG] osdmap e5866: 192 total, 192 up, 192 in 2022-08-18T17:28:47.522808+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50388 : cluster [DBG] pgmap v51974: 4769 pgs: 1 active+recovery_wait+degraded+remapped, 79 active+undersized+degraded+wait, 4 active+undersized+wait, 54 peering, 4631 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 13 KiB/s rd, 87 KiB/s wr, 39 op/s; 3271084/1116163323 objects degraded (0.293%); 44897/1116163323 objects misplaced (0.004%) 2022-08-18T17:28:47.931797+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43308 : cluster [WRN] Health check update: Reduced data availability: 51 pgs inactive, 54 pgs peering (PG_AVAILABILITY) 2022-08-18T17:28:47.931818+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43309 : cluster [WRN] Health check update: Degraded data redundancy: 3271084/1116163323 objects degraded (0.293%), 80 pgs degraded, 59 pgs undersized (PG_DEGRADED) 2022-08-18T17:28:47.945693+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43310 : cluster [DBG] osdmap e5867: 192 total, 192 up, 192 in 2022-08-18T17:28:54.452876+0000 osd.4 (osd.4) 2908 : cluster [DBG] 9.c78s0 starting backfill to osd.14(0) from (0'0,0'0] MAX to 5863'48174 2022-08-18T17:28:49.534609+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50389 : cluster [DBG] pgmap v51976: 4769 pgs: 1 activating+undersized+degraded+remapped, 43 active+undersized+degraded+wait, 2 active+undersized+wait, 44 peering, 4679 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 14 KiB/s rd, 4.3 KiB/s wr, 16 op/s; 1772631/1116163323 objects degraded (0.159%); 44897/1116163323 objects misplaced (0.004%) 2022-08-18T17:28:49.936427+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43311 : cluster [INF] Health check cleared: SLOW_OPS (was: 3 slow ops, oldest one blocked for 396 sec, osd.4 has slow ops) 2022-08-18T17:28:51.561392+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50390 : cluster [DBG] pgmap v51977: 4769 pgs: 1 activating+undersized+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 3.6 KiB/s rd, 1.3 KiB/s wr, 4 op/s; 683/1116163323 objects degraded (0.000%); 44897/1116163323 objects misplaced (0.004%) 2022-08-18T17:28:52.239800+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43313 : cluster [INF] Health check cleared: PG_AVAILABILITY (was: Reduced data availability: 42 pgs inactive, 44 pgs peering) 2022-08-18T17:28:53.569002+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50391 : cluster [DBG] pgmap v51978: 4769 pgs: 1 activating+undersized+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 2.4 KiB/s rd, 803 B/s wr, 2 op/s; 683/1116163323 objects degraded (0.000%); 44897/1116163323 objects misplaced (0.004%) 2022-08-18T17:28:55.599643+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50392 : cluster [DBG] pgmap v51979: 4769 pgs: 1 active+recovering+undersized+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 52 KiB/s rd, 63 KiB/s wr, 78 op/s; 500/1116163323 objects degraded (0.000%); 104/1116163323 objects misplaced (0.000%) 2022-08-18T17:28:55.730821+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43315 : cluster [WRN] Health check update: Degraded data redundancy: 683/1116163323 objects degraded (0.000%), 1 pg degraded (PG_DEGRADED) 2022-08-18T17:28:57.607845+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50393 : cluster [DBG] pgmap v51980: 4769 pgs: 1 active+recovering+undersized+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 52 KiB/s rd, 56 KiB/s wr, 77 op/s; 500/1116163323 objects degraded (0.000%); 104/1116163323 objects misplaced (0.000%) 2022-08-18T17:28:59.621410+0000 mgr.f28-h30-000-r630.yjjsgj (mgr.24231) 50394 : cluster [DBG] pgmap v51981: 4769 pgs: 1 active+recovering+undersized+degraded+remapped, 4768 active+clean; 7.9 TiB data, 20 TiB used, 335 TiB / 355 TiB avail; 50 KiB/s rd, 49 KiB/s wr, 75 op/s; 483/1116163323 objects degraded (0.000%); 99/1116163323 objects misplaced (0.000%); 51 KiB/s, 1 objects/s recovering 2022-08-18T17:29:00.732584+0000 mon.f28-h28-000-r630.rdu2.scalelab.redhat.com (mon.0) 43317 : cluster [WRN] Health check update: Degraded data redundancy: 483/1116163323 objects degraded (0.000%), 1 pg degraded (PG_DEGRADED)
- From PG 9.c78 query:
"state": "activating+degraded+remapped", "epoch": 5861, "up": [ 14, 51, 41, 8, 117, 156 ], "acting": [ 4, 51, 41, 8, 117, 156 ], "backfill_targets": [ "14(0)" ], "acting_recovery_backfill": [ "4(0)", "8(3)", "14(0)", "41(2)", "51(1)", "117(4)", "156(5)" ],
#5 Updated by Vikhyat Umrao about 1 year ago
We did capture the debug logs(debug_osd = 20 and debug_ms = 1) and they are here - f28-h28-000-r630.rdu2.scalelab.redhat.com:/root/tracker57185. In any case from the first look itself, it was clear why we had this PG getting stuck in activating:
2022-08-18T16:34:47.949+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.c78s0: 750 >= 750
Then we also had multiple other PGs but maybe they were fine as they might got activated way before we could have caught them.
# cat ceph-osd.14.log | grep withhold | grep -v 9.c78 2022-08-18T16:34:47.869+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.2b2s4: 750 >= 750 2022-08-18T16:34:47.870+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.6b3s5: 750 >= 750 2022-08-18T16:34:47.872+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.4f1s2: 750 >= 750 2022-08-18T16:34:47.873+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.316s5: 750 >= 750 2022-08-18T16:34:47.873+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.d1es1: 750 >= 750 2022-08-18T16:34:47.873+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.c3s5: 750 >= 750 2022-08-18T16:34:47.874+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.7cds1: 750 >= 750 2022-08-18T16:34:47.875+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.d78s4: 750 >= 750 2022-08-18T16:34:47.875+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.62cs1: 750 >= 750 2022-08-18T16:34:47.875+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.45es2: 750 >= 750 2022-08-18T16:34:47.876+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.7c0s5: 750 >= 750 2022-08-18T16:34:47.876+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.6f2s5: 750 >= 750 2022-08-18T16:34:47.876+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.1ccs0: 750 >= 750 2022-08-18T16:34:47.876+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.818s0: 750 >= 750 2022-08-18T16:34:47.877+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.337s3: 750 >= 750 2022-08-18T16:34:47.877+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.fa2s4: 750 >= 750 2022-08-18T16:34:47.877+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.5ds3: 750 >= 750 2022-08-18T16:34:47.878+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.4f6s1: 750 >= 750 2022-08-18T16:34:47.878+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.14es1: 750 >= 750 2022-08-18T16:34:47.878+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.edas4: 750 >= 750 2022-08-18T16:34:47.878+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.9d6s4: 750 >= 750 2022-08-18T16:34:47.879+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.220s0: 750 >= 750 2022-08-18T16:34:47.879+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.5c5s4: 750 >= 750 2022-08-18T16:34:47.879+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.950s0: 750 >= 750 2022-08-18T16:34:47.880+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.c39s3: 750 >= 750 2022-08-18T16:34:47.881+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.e7ds1: 750 >= 750 2022-08-18T16:34:47.881+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.2d2s0: 750 >= 750 2022-08-18T16:34:47.882+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.f9s3: 750 >= 750 2022-08-18T16:34:47.884+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.991s5: 750 >= 750 2022-08-18T16:34:47.884+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.69cs1: 750 >= 750 2022-08-18T16:34:47.886+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.ecbs1: 750 >= 750 2022-08-18T16:34:47.887+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.603s3: 750 >= 750 2022-08-18T16:34:47.888+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.6e6s2: 750 >= 750 2022-08-18T16:34:47.888+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.388s4: 750 >= 750 2022-08-18T16:34:47.889+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.761s4: 750 >= 750 2022-08-18T16:34:47.890+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.dd0s4: 750 >= 750 2022-08-18T16:34:47.890+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.c57s2: 750 >= 750 2022-08-18T16:34:47.890+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.e39s0: 750 >= 750 2022-08-18T16:34:47.892+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.cd8s0: 750 >= 750 2022-08-18T16:34:47.893+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.a36s0: 750 >= 750 2022-08-18T16:34:47.893+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.242s2: 750 >= 750 2022-08-18T16:34:47.893+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.c80s3: 750 >= 750 2022-08-18T16:34:47.894+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.22s3: 750 >= 750 2022-08-18T16:34:47.894+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.97cs1: 750 >= 750 2022-08-18T16:34:47.894+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.d0cs5: 750 >= 750 2022-08-18T16:34:47.896+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.a31s5: 750 >= 750 2022-08-18T16:34:47.897+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.405s5: 750 >= 750 2022-08-18T16:34:47.897+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.616s0: 750 >= 750 2022-08-18T16:34:47.898+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.b2as2: 750 >= 750 2022-08-18T16:34:47.898+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.ceds5: 750 >= 750 2022-08-18T16:34:47.899+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.f72s4: 750 >= 750 2022-08-18T16:34:47.900+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.c92s0: 750 >= 750 2022-08-18T16:34:47.901+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.274s5: 750 >= 750 2022-08-18T16:34:47.901+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.f27s3: 750 >= 750 2022-08-18T16:34:47.904+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.74s0: 750 >= 750 2022-08-18T16:34:47.904+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.c4fs0: 750 >= 750 2022-08-18T16:34:47.906+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.9c0s2: 750 >= 750 2022-08-18T16:34:47.906+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 11.63: 750 >= 750 2022-08-18T16:34:47.906+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.916s3: 750 >= 750 2022-08-18T16:34:47.907+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.d8s0: 750 >= 750 2022-08-18T16:34:47.909+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.3b7s0: 750 >= 750 2022-08-18T16:34:47.910+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.38s0: 750 >= 750 2022-08-18T16:34:47.912+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.a6s5: 750 >= 750 2022-08-18T16:34:47.913+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.6aas4: 750 >= 750 2022-08-18T16:34:47.913+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.8b1s3: 750 >= 750 2022-08-18T16:34:47.914+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.8f8s2: 750 >= 750 2022-08-18T16:34:47.915+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.213s3: 750 >= 750 2022-08-18T16:34:47.917+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.ae2s5: 750 >= 750 2022-08-18T16:34:47.920+0000 7fa662f20700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 10.95: 750 >= 750 2022-08-18T16:34:47.921+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.e11s4: 750 >= 750 2022-08-18T16:34:47.921+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.74fs2: 750 >= 750 2022-08-18T16:34:47.922+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.367s3: 750 >= 750 2022-08-18T16:34:47.923+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.286s1: 750 >= 750 2022-08-18T16:34:47.924+0000 7fa664f24700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 10.fa: 750 >= 750 2022-08-18T16:34:47.924+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.52es3: 750 >= 750 2022-08-18T16:34:47.925+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.ac4s5: 750 >= 750 2022-08-18T16:34:47.929+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.cces2: 750 >= 750 2022-08-18T16:34:47.930+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 10.d4: 750 >= 750 2022-08-18T16:34:47.932+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 11.24: 750 >= 750 2022-08-18T16:34:47.932+0000 7fa664723700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.169s3: 750 >= 750 2022-08-18T16:34:47.932+0000 7fa663721700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.6b1s1: 750 >= 750 2022-08-18T16:34:47.934+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 10.89: 750 >= 750 2022-08-18T16:34:47.942+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.7a5s2: 750 >= 750 2022-08-18T16:34:47.976+0000 7fa663f22700 1 osd.14 5218 maybe_wait_for_max_pg withhold creation of pg 9.ef3s2: 750 >= 750
#6 Updated by Vikhyat Umrao about 1 year ago
- Here we were testing OSD failure - recovery/backfill for mClock Scheduler and for this we bring in phases one OSD node and then two OSD nodes down and bring them back so when OSDs get marked out they map and remap PGs looks like that is where we go above 750 PGs threshold.
#7 Updated by Vikhyat Umrao about 1 year ago
Tim - Two workaround as you see your script sometimes balancer module keeps changing PGs stat that is not a valid test during OSD Failure and this PG threshold.
Please use the following commands to fix it:
1. Switch off the balancer
ceph balancer off
2. Increase the PG threshold default from 250 to 350 so it will take the threshold to 350*3 => 1050
ceph config set global mon_max_pg_per_osd 350
- Description
- name: mon_max_pg_per_osd type: uint level: advanced desc: Max number of PGs per OSD the cluster will allow long_desc: If the number of PGs per OSD exceeds this, a health warning will be visible in `ceph status`. This is also used in automated PG management, as the threshold at which some pools' pg_num may be shrunk in order to enable increasing the pg_num of others. default: 250 flags: - runtime services: - mgr - mon min: 1
#8 Updated by Vikhyat Umrao about 1 year ago
This should have been easily caught if we had this implemented:
https://tracker.ceph.com/issues/23117
https://github.com/ceph/ceph/pull/44962
#9 Updated by Vikhyat Umrao about 1 year ago
- Duplicated by Bug #23117: PGs stuck in "activating" after osd_max_pg_per_osd_hard_ratio has been exceeded once added
#10 Updated by Vikhyat Umrao about 1 year ago
- Status changed from New to Duplicate