Actions
Bug #52255
openThe pgs state are degraded, but all the osds is up and there is no recovering and backfilling
Status:
Need More Info
Priority:
Normal
Assignee:
-
Category:
-
Target version:
-
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
rbd
Component(RADOS):
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
I removed a server yesterday, but there are 6 pgs are in stare degraded and no longer changed.
The copy size of pool is 3.
The health detail content show that
HEALTH_WARN Degraded data redundancy: 7812/4856175 objects degraded (0.161%), 6 pgs degraded, 6 pgs undersized
PG_DEGRADED Degraded data redundancy: 7812/4856175 objects degraded (0.161%), 6 pgs degraded, 6 pgs undersized
pg 3.1f is stuck undersized for 5227.725424, current state active+undersized+degraded, last acting [33,8]
pg 3.3b is stuck undersized for 44976.167641, current state active+undersized+degraded, last acting [36,21]
pg 3.3c is stuck undersized for 44976.174640, current state active+undersized+degraded, last acting [36,7]
pg 3.4a is stuck undersized for 44976.171052, current state active+undersized+degraded, last acting [10,34]
pg 3.66 is stuck undersized for 44976.168330, current state active+undersized+degraded, last acting [19,16]
pg 3.7f is stuck undersized for 66349.043653, current state active+undersized+degraded, last acting [21,18]
The coomand `ceph pg dump_stuck` show that :
PG_STAT STATE UP UP_PRIMARY ACTING ACTING_PRIMARY
3.4a active+undersized+degraded [10,34] 10 [10,34] 10
3.3c active+undersized+degraded [36,7] 36 [36,7] 36
3.3b active+undersized+degraded [36,21] 36 [36,21] 36
3.1f active+undersized+degraded [33,8] 33 [33,8] 33
3.66 active+undersized+degraded [19,16] 19 [19,16] 19
3.7f active+undersized+degraded [21,18] 21 [21,18] 21
One of the pg info like that:
# ceph pg 3.4a query { "state": "active+undersized+degraded", "snap_trimq": "[]", "snap_trimq_len": 0, "epoch": 3087, "up": [ 10, 34 ], "acting": [ 10, 34 ], "actingbackfill": [ "10", "34" ], "info": { "pgid": "3.4a", "last_update": "3087'17416148", "last_complete": "3087'17416148", "log_tail": "2070'17406078", "last_user_version": 17416148, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": [ { "start": "1", "length": "d" }, { "start": "13", "length": "2" } ], "history": { "epoch_created": 144, "epoch_pool_created": 144, "last_epoch_started": 2075, "last_interval_started": 2074, "last_epoch_clean": 1378, "last_interval_clean": 1359, "last_epoch_split": 206, "last_epoch_marked_full": 0, "same_up_since": 2074, "same_interval_since": 2074, "same_primary_since": 582, "last_scrub": "1346'17384156", "last_scrub_stamp": "2021-08-12 01:00:46.854454", "last_deep_scrub": "1346'17363981", "last_deep_scrub_stamp": "2021-08-11 00:21:39.115330", "last_clean_scrub_stamp": "2021-08-12 01:00:46.854454" }, "stats": { "version": "3087'17416148", "reported_seq": "27965921", "reported_epoch": "3087", "state": "active+undersized+degraded", "last_fresh": "2021-08-13 11:10:19.963929", "last_change": "2021-08-12 22:37:42.523713", "last_active": "2021-08-13 11:10:19.963929", "last_peered": "2021-08-13 11:10:19.963929", "last_clean": "2021-08-12 22:37:39.959234", "last_became_active": "2021-08-12 22:37:42.523713", "last_became_peered": "2021-08-12 22:37:42.523713", "last_unstale": "2021-08-13 11:10:19.963929", "last_undegraded": "2021-08-12 22:37:42.511001", "last_fullsized": "2021-08-12 22:37:42.509855", "mapping_epoch": 2074, "log_start": "2070'17406078", "ondisk_log_start": "2070'17406078", "created": 144, "last_epoch_clean": 1378, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "1346'17384156", "last_scrub_stamp": "2021-08-12 01:00:46.854454", "last_deep_scrub": "1346'17363981", "last_deep_scrub_stamp": "2021-08-11 00:21:39.115330", "last_clean_scrub_stamp": "2021-08-12 01:00:46.854454", "log_size": 10070, "ondisk_log_size": 10070, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "snaptrimq_len": 0, "stat_sum": { "num_bytes": 5377932020, "num_objects": 1288, "num_object_clones": 1, "num_object_copies": 3864, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 1288, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 1288, "num_whiteouts": 0, "num_read": 28230422, "num_read_kb": 111166288, "num_write": 17408569, "num_write_kb": 578323545, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 1279, "num_bytes_recovered": 5330412276, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0 }, "up": [ 10, 34 ], "acting": [ 10, 34 ], "blocked_by": [], "up_primary": 10, "acting_primary": 10 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 2075, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, "peer_info": [ { "peer": "34", "pgid": "3.4a", "last_update": "3087'17416148", "last_complete": "3087'17416148", "log_tail": "2070'17404578", "last_user_version": 17406149, "last_backfill": "MAX", "last_backfill_bitwise": 1, "purged_snaps": [ { "start": "1", "length": "d" }, { "start": "13", "length": "2" } ], "history": { "epoch_created": 144, "epoch_pool_created": 144, "last_epoch_started": 2075, "last_interval_started": 2074, "last_epoch_clean": 1378, "last_interval_clean": 1359, "last_epoch_split": 206, "last_epoch_marked_full": 0, "same_up_since": 2074, "same_interval_since": 2074, "same_primary_since": 582, "last_scrub": "1346'17384156", "last_scrub_stamp": "2021-08-12 01:00:46.854454", "last_deep_scrub": "1346'17363981", "last_deep_scrub_stamp": "2021-08-11 00:21:39.115330", "last_clean_scrub_stamp": "2021-08-12 01:00:46.854454" }, "stats": { "version": "2073'17406148", "reported_seq": "27948804", "reported_epoch": "2073", "state": "active+clean", "last_fresh": "2021-08-12 22:37:39.944287", "last_change": "2021-08-12 16:47:34.937642", "last_active": "2021-08-12 22:37:39.944287", "last_peered": "2021-08-12 22:37:39.944287", "last_clean": "2021-08-12 22:37:39.944287", "last_became_active": "2021-08-12 16:43:30.452372", "last_became_peered": "2021-08-12 16:43:30.452372", "last_unstale": "2021-08-12 22:37:39.944287", "last_undegraded": "2021-08-12 22:37:39.944287", "last_fullsized": "2021-08-12 22:37:39.944287", "mapping_epoch": 2074, "log_start": "2070'17404578", "ondisk_log_start": "2070'17404578", "created": 144, "last_epoch_clean": 1378, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "1346'17384156", "last_scrub_stamp": "2021-08-12 01:00:46.854454", "last_deep_scrub": "1346'17363981", "last_deep_scrub_stamp": "2021-08-11 00:21:39.115330", "last_clean_scrub_stamp": "2021-08-12 01:00:46.854454", "log_size": 1570, "ondisk_log_size": 1570, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "snaptrimq_len": 0, "stat_sum": { "num_bytes": 5377932020, "num_objects": 1288, "num_object_clones": 1, "num_object_copies": 3864, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 1288, "num_whiteouts": 0, "num_read": 28216747, "num_read_kb": 111077141, "num_write": 17398570, "num_write_kb": 577991459, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 1279, "num_bytes_recovered": 5330412276, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0 }, "up": [ 10, 34 ], "acting": [ 10, 34 ], "blocked_by": [], "up_primary": 10, "acting_primary": 10 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 2075, "hit_set_history": { "current_last_update": "0'0", "history": [] } } ], "recovery_state": [ { "name": "Started/Primary/Active", "enter_time": "2021-08-12 22:37:42.509959", "might_have_unfound": [], "recovery_progress": { "backfill_targets": [], "waiting_on_backfill": [], "last_backfill_started": "MIN", "backfill_info": { "begin": "MIN", "end": "MIN", "objects": [] }, "peer_backfill_info": [], "backfills_in_flight": [], "recovering": [], "pg_backend": { "pull_from_peer": [], "pushing": [] } }, "scrub": { "scrubber.epoch_start": "803", "scrubber.active": false, "scrubber.state": "INACTIVE", "scrubber.start": "MIN", "scrubber.end": "MIN", "scrubber.max_end": "MIN", "scrubber.subset_last_update": "0'0", "scrubber.deep": false, "scrubber.waiting_on_whom": [] } }, { "name": "Started", "enter_time": "2021-08-12 22:37:41.514118" } ], "agent_state": {} }
What should I do? Hode to get your Help.
Files
Updated by Neha Ojha over 2 years ago
can you share your osdmap? are all your osds up and in? the crushmap looks fine.
Updated by Ke Xiao over 2 years ago
- File osd.tree.txt osd.tree.txt added
Neha Ojha wrote:
can you share your osdmap? are all your osds up and in? the crushmap looks fine.
all the osds are up and in,this is my osd map
Updated by Ke Xiao over 2 years ago
Neha Ojha wrote:
can you share your osdmap? are all your osds up and in? the crushmap looks fine.
wish to get your help
Actions