Project

General

Profile

Actions

Bug #6681

closed

osd recovery hung

Added by Samuel Just over 10 years ago. Updated about 10 years ago.

Status:
Resolved
Priority:
Urgent
Assignee:
Category:
-
Target version:
-
% Done:

0%

Source:
Q/A
Tags:
Backport:
dumpling
Regression:
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

ubuntu@teuthology:/a/teuthology-2013-10-29_10:45:19-rados-next-testing-basic-plana

ubuntu@plana31:~$ sudo ceph -s
cluster aff625a2-8a2f-4cb5-9f70-923d40fa0dba
health HEALTH_WARN 3 pgs recovering; 3 pgs stuck unclean; 24 requests are blocked > 32 sec; recovery 861/49076 objects degraded (1.754%); 326/24538 unfound (1.329%); pool data pg_num 64 > pgp_num 54; pool metadata pg_num 34 > pgp_num 24; pool rbd pg_num 34 > pgp_num 24
monmap e1: 3 mons at {a=10.214.131.9:6789/0,b=10.214.132.11:6789/0,c=10.214.131.9:6790/0}, election epoch 6, quorum 0,1,2 a,b,c
mdsmap e5: 1/1/1 up {0=a=up:active}
osdmap e284: 6 osds: 6 up, 4 in
pgmap v1902: 132 pgs, 3 pools, 98032 MB data, 24538 objects
114 GB used, 1745 GB / 1863 GB avail
861/49076 objects degraded (1.754%); 326/24538 unfound (1.329%)
129 active+clean
3 active+recovering

ubuntu@plana31:~$ sudo ceph pg dump | grep recovering
dumped all in format plain
0.3a 500 169 395 169 2097152000 532 532 active+recovering 2013-10-29 12:02:07.668419 207'532 284:265 [4,0] [4,0] 0'0 2013-10-29 11:43:09.707243 0'0 2013-10-29 11:43:09.707243
0.37 404 156 382 156 1694498816 410 410 active+recovering 2013-10-29 12:02:25.523768 203'410 284:259 [5,1] [5,1] 0'0 2013-10-29 11:43:08.645224 0'0 2013-10-29 11:43:08.645224
0.36 383 1 84 1 1602224140 403 403 active+recovering 2013-10-29 12:02:35.302149 219'403 284:585 [5,1] [5,1] 0'0 2013-10-29 11:43:08.644738 0'0 2013-10-29 11:43:08.644738

{ "state": "active+recovering",
"epoch": 284,
"up": [
4,
0],
"acting": [
4,
0],
"info": { "pgid": "0.3a",
"last_update": "207'532",
"last_complete": "11'50",
"log_tail": "0'0",
"last_user_version": 532,
"last_backfill": "MAX",
"purged_snaps": "[]",
"history": { "epoch_created": 1,
"last_epoch_started": 163,
"last_epoch_clean": 95,
"last_epoch_split": 0,
"same_up_since": 162,
"same_interval_since": 162,
"same_primary_since": 162,
"last_scrub": "0'0",
"last_scrub_stamp": "2013-10-29 11:43:09.707243",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "2013-10-29 11:43:09.707243",
"last_clean_scrub_stamp": "0.000000"},
"stats": { "version": "207'532",
"reported_seq": "265",
"reported_epoch": "284",
"state": "active+recovering",
"last_fresh": "2013-10-29 12:27:49.857720",
"last_change": "2013-10-29 12:02:07.668419",
"last_active": "2013-10-29 12:27:49.857720",
"last_clean": "2013-10-29 12:00:06.712576",
"last_became_active": "0.000000",
"last_unstale": "2013-10-29 12:27:49.857720",
"mapping_epoch": 160,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 1,
"last_epoch_clean": 95,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "2013-10-29 11:43:09.707243",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "2013-10-29 11:43:09.707243",
"last_clean_scrub_stamp": "0.000000",
"log_size": 532,
"ondisk_log_size": 532,
"stats_invalid": "1",
"stat_sum": { "num_bytes": 2097152000,
"num_objects": 500,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_degraded": 0,
"num_objects_unfound": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 532,
"num_write_kb": 2113536,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 298,
"num_bytes_recovered": 1249902592,
"num_keys_recovered": 0},
"stat_cat_sum": {},
"up": [
4,
0],
"acting": [
4,
0]},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 163},
"recovery_state": [ { "name": "Started\/Primary\/Active",
"enter_time": "2013-10-29 12:02:07.476638",
"might_have_unfound": [ { "osd": 0,
"status": "already probed"}, { "osd": 1,
"status": "already probed"}, { "osd": 2,
"status": "querying"}, { "osd": 3,
"status": "querying"}, { "osd": 5,
"status": "already probed"}],
"recovery_progress": { "backfill_target": -1,
"waiting_on_backfill": 0,
"last_backfill_started": "0\/\/0\/\/-1",
"backfill_info": { "begin": "0\/\/0\/\/-1",
"end": "0\/\/0\/\/-1",
"objects": []},
"peer_backfill_info": { "begin": "0\/\/0\/\/-1",
"end": "0\/\/0\/\/-1",
"objects": []},
"backfills_in_flight": [],
"recovering": [],
"pg_backend": { "pull_from_peer": [],
"pushing": []}},
"scrub": { "scrubber.epoch_start": "0",
"scrubber.active": 0,
"scrubber.block_writes": 0,
"scrubber.finalizing": 0,
"scrubber.waiting_on": 0,
"scrubber.waiting_on_whom": []}}, { "name": "Started",
"enter_time": "2013-10-29 12:02:06.321424"}]}

Actions

Also available in: Atom PDF