1
|
import json
|
2
|
import logging
|
3
|
import errno
|
4
|
import re
|
5
|
from teuthology.contextutil import MaxWhileTries
|
6
|
from teuthology.exceptions import CommandFailedError
|
7
|
from teuthology.orchestra.run import wait
|
8
|
from tasks.cephfs.fuse_mount import FuseMount
|
9
|
from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
|
10
|
|
11
|
DAMAGED_ON_START = "damaged_on_start"
|
12
|
DAMAGED_ON_LS = "damaged_on_ls"
|
13
|
CRASHED = "server crashed"
|
14
|
NO_DAMAGE = "no damage"
|
15
|
READONLY = "readonly"
|
16
|
FAILED_CLIENT = "client failed"
|
17
|
FAILED_SERVER = "server failed"
|
18
|
|
19
|
|
20
|
EIO_ON_LS = "eio"
|
21
|
|
22
|
|
23
|
EIO_NO_DAMAGE = "eio without damage entry"
|
24
|
|
25
|
|
26
|
log = logging.getLogger(__name__)
|
27
|
|
28
|
|
29
|
class TestDamage(CephFSTestCase):
|
30
|
def _simple_workload_write(self):
|
31
|
self.mount_a.run_shell(["mkdir", "subdir"])
|
32
|
self.mount_a.write_n_mb("subdir/sixmegs", 6)
|
33
|
return self.mount_a.stat("subdir/sixmegs")
|
34
|
|
35
|
def is_marked_damaged(self, rank):
|
36
|
mds_map = self.fs.get_mds_map()
|
37
|
return rank in mds_map['damaged']
|
38
|
|
39
|
@for_teuthology
|
40
|
def test_object_deletion(self):
|
41
|
"""
|
42
|
That the MDS has a clean 'damaged' response to loss of any single metadata object
|
43
|
"""
|
44
|
|
45
|
self._simple_workload_write()
|
46
|
|
47
|
|
48
|
|
49
|
|
50
|
self.mount_a.umount_wait()
|
51
|
for mds_name in self.fs.get_active_names():
|
52
|
self.fs.mds_asok(["flush", "journal"], mds_name)
|
53
|
|
54
|
self.fs.mds_stop()
|
55
|
self.fs.mds_fail()
|
56
|
|
57
|
self.fs.rados(['export', '/tmp/metadata.bin'])
|
58
|
|
59
|
def is_ignored(obj_id, dentry=None):
|
60
|
"""
|
61
|
A filter to avoid redundantly mutating many similar objects (e.g.
|
62
|
stray dirfrags) or similar dentries (e.g. stray dir dentries)
|
63
|
"""
|
64
|
if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
|
65
|
return True
|
66
|
|
67
|
if dentry and obj_id == "100.00000000":
|
68
|
if re.match("stray.+_head", dentry) and dentry != "stray0_head":
|
69
|
return True
|
70
|
|
71
|
return False
|
72
|
|
73
|
def get_path(obj_id, dentry=None):
|
74
|
"""
|
75
|
What filesystem path does this object or dentry correspond to? i.e.
|
76
|
what should I poke to see EIO after damaging it?
|
77
|
"""
|
78
|
|
79
|
if obj_id == "1.00000000" and dentry == "subdir_head":
|
80
|
return "./subdir"
|
81
|
elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
|
82
|
return "./subdir/sixmegs"
|
83
|
|
84
|
|
85
|
return None
|
86
|
|
87
|
objects = self.fs.rados(["ls"]).split("\n")
|
88
|
objects = [o for o in objects if not is_ignored(o)]
|
89
|
|
90
|
|
91
|
omap_header_objs = []
|
92
|
for o in objects:
|
93
|
header = self.fs.rados(["getomapheader", o])
|
94
|
|
95
|
header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
|
96
|
if header_bytes > 0:
|
97
|
omap_header_objs.append(o)
|
98
|
|
99
|
|
100
|
omap_keys = []
|
101
|
for o in objects:
|
102
|
keys_str = self.fs.rados(["listomapkeys", o])
|
103
|
if keys_str:
|
104
|
for key in keys_str.split("\n"):
|
105
|
if not is_ignored(o, key):
|
106
|
omap_keys.append((o, key))
|
107
|
|
108
|
|
109
|
data_objects = []
|
110
|
for obj_id in objects:
|
111
|
stat_out = self.fs.rados(["stat", obj_id])
|
112
|
size = int(re.match(".+, size (.+)$", stat_out).group(1))
|
113
|
if size > 0:
|
114
|
data_objects.append(obj_id)
|
115
|
|
116
|
|
117
|
class MetadataMutation(object):
|
118
|
def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
|
119
|
self.obj_id = obj_id_
|
120
|
self.desc = desc_
|
121
|
self.mutate_fn = mutate_fn_
|
122
|
self.expectation = expectation_
|
123
|
if ls_path is None:
|
124
|
self.ls_path = "."
|
125
|
else:
|
126
|
self.ls_path = ls_path
|
127
|
|
128
|
def __eq__(self, other):
|
129
|
return self.desc == other.desc
|
130
|
|
131
|
def __hash__(self):
|
132
|
return hash(self.desc)
|
133
|
|
134
|
junk = "deadbeef" * 10
|
135
|
mutations = []
|
136
|
|
137
|
|
138
|
for o in objects:
|
139
|
if o in [
|
140
|
|
141
|
"400.00000000",
|
142
|
|
143
|
"10000000000.00000000",
|
144
|
|
145
|
"500.00000000",
|
146
|
|
147
|
"mds0_openfiles.0"
|
148
|
]:
|
149
|
expectation = NO_DAMAGE
|
150
|
else:
|
151
|
expectation = DAMAGED_ON_START
|
152
|
|
153
|
log.info("Expectation on rm '{0}' will be '{1}'".format(
|
154
|
o, expectation
|
155
|
))
|
156
|
|
157
|
mutations.append(MetadataMutation(
|
158
|
o,
|
159
|
"Delete {0}".format(o),
|
160
|
lambda o=o: self.fs.rados(["rm", o]),
|
161
|
expectation
|
162
|
))
|
163
|
|
164
|
|
165
|
for obj_id in data_objects:
|
166
|
if obj_id == "500.00000000":
|
167
|
|
168
|
mutations.append(MetadataMutation(
|
169
|
obj_id,
|
170
|
"Corrupt {0}".format(obj_id),
|
171
|
lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
|
172
|
READONLY
|
173
|
))
|
174
|
else:
|
175
|
mutations.append(MetadataMutation(
|
176
|
obj_id,
|
177
|
"Corrupt {0}".format(obj_id),
|
178
|
lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
|
179
|
DAMAGED_ON_START
|
180
|
))
|
181
|
|
182
|
|
183
|
for o in data_objects:
|
184
|
if o == "500.00000000":
|
185
|
|
186
|
|
187
|
expectation = NO_DAMAGE
|
188
|
else:
|
189
|
expectation = DAMAGED_ON_START
|
190
|
|
191
|
mutations.append(
|
192
|
MetadataMutation(
|
193
|
o,
|
194
|
"Truncate {0}".format(o),
|
195
|
lambda o=o: self.fs.rados(["truncate", o, "0"]),
|
196
|
expectation
|
197
|
))
|
198
|
|
199
|
|
200
|
for o, k in omap_keys:
|
201
|
if o.startswith("100."):
|
202
|
|
203
|
expectation = DAMAGED_ON_START
|
204
|
else:
|
205
|
expectation = EIO_ON_LS
|
206
|
|
207
|
mutations.append(
|
208
|
MetadataMutation(
|
209
|
o,
|
210
|
"Corrupt omap key {0}:{1}".format(o, k),
|
211
|
lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
|
212
|
expectation,
|
213
|
get_path(o, k)
|
214
|
)
|
215
|
)
|
216
|
|
217
|
|
218
|
for o in omap_header_objs:
|
219
|
if re.match("60.\.00000000", o) \
|
220
|
or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
|
221
|
expectation = DAMAGED_ON_START
|
222
|
else:
|
223
|
expectation = NO_DAMAGE
|
224
|
|
225
|
log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
|
226
|
o, expectation
|
227
|
))
|
228
|
|
229
|
mutations.append(
|
230
|
MetadataMutation(
|
231
|
o,
|
232
|
"Corrupt omap header on {0}".format(o),
|
233
|
lambda o=o: self.fs.rados(["setomapheader", o, junk]),
|
234
|
expectation
|
235
|
)
|
236
|
)
|
237
|
|
238
|
results = {}
|
239
|
|
240
|
for mutation in mutations:
|
241
|
log.info("Applying mutation '{0}'".format(mutation.desc))
|
242
|
|
243
|
|
244
|
self.mount_a.umount_wait(force=True)
|
245
|
self.fs.mds_stop()
|
246
|
self.fs.mds_fail()
|
247
|
self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
|
248
|
|
249
|
|
250
|
self.fs.rados(['import', '/tmp/metadata.bin'])
|
251
|
|
252
|
|
253
|
mutation.mutate_fn()
|
254
|
|
255
|
|
256
|
self.fs.mds_restart()
|
257
|
|
258
|
|
259
|
|
260
|
|
261
|
startup_timeout = 60
|
262
|
|
263
|
if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
|
264
|
if mutation.expectation == DAMAGED_ON_START:
|
265
|
|
266
|
try:
|
267
|
self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
|
268
|
except RuntimeError:
|
269
|
pass
|
270
|
|
271
|
|
272
|
try:
|
273
|
self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
|
274
|
except RuntimeError:
|
275
|
crashed = False
|
276
|
|
277
|
for daemon_id, daemon in self.fs.mds_daemons.items():
|
278
|
if daemon.proc and daemon.proc.finished:
|
279
|
crashed = True
|
280
|
log.error("Daemon {0} crashed!".format(daemon_id))
|
281
|
daemon.proc = None
|
282
|
if not crashed:
|
283
|
|
284
|
raise
|
285
|
else:
|
286
|
log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
|
287
|
results[mutation] = CRASHED
|
288
|
continue
|
289
|
if self.is_marked_damaged(0):
|
290
|
log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
|
291
|
results[mutation] = DAMAGED_ON_START
|
292
|
continue
|
293
|
else:
|
294
|
log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
|
295
|
else:
|
296
|
try:
|
297
|
self.wait_until_true(self.fs.are_daemons_healthy, 60)
|
298
|
except RuntimeError:
|
299
|
log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
|
300
|
if self.is_marked_damaged(0):
|
301
|
results[mutation] = DAMAGED_ON_START
|
302
|
else:
|
303
|
results[mutation] = FAILED_SERVER
|
304
|
continue
|
305
|
log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
|
306
|
|
307
|
|
308
|
self.mount_a.mount_wait()
|
309
|
if mutation.ls_path == ".":
|
310
|
proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
|
311
|
else:
|
312
|
proc = self.mount_a.stat(mutation.ls_path, wait=False)
|
313
|
|
314
|
if mutation.expectation == DAMAGED_ON_LS:
|
315
|
try:
|
316
|
self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
|
317
|
log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
|
318
|
results[mutation] = DAMAGED_ON_LS
|
319
|
except RuntimeError:
|
320
|
if self.fs.are_daemons_healthy():
|
321
|
log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
|
322
|
mutation.desc))
|
323
|
results[mutation] = NO_DAMAGE
|
324
|
else:
|
325
|
log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
|
326
|
results[mutation] = FAILED_SERVER
|
327
|
elif mutation.expectation == READONLY:
|
328
|
proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
|
329
|
try:
|
330
|
proc.wait()
|
331
|
except CommandFailedError:
|
332
|
stderr = proc.stderr.getvalue()
|
333
|
log.info(stderr)
|
334
|
if "Read-only file system".lower() in stderr.lower():
|
335
|
pass
|
336
|
else:
|
337
|
raise
|
338
|
else:
|
339
|
try:
|
340
|
wait([proc], 20)
|
341
|
log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
|
342
|
results[mutation] = NO_DAMAGE
|
343
|
except MaxWhileTries:
|
344
|
log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
|
345
|
results[mutation] = FAILED_CLIENT
|
346
|
except CommandFailedError as e:
|
347
|
if e.exitstatus == errno.EIO:
|
348
|
log.info("Result: EIO on client")
|
349
|
results[mutation] = EIO_ON_LS
|
350
|
else:
|
351
|
log.info("Result: unexpected error {0} on client".format(e))
|
352
|
results[mutation] = FAILED_CLIENT
|
353
|
|
354
|
if mutation.expectation == EIO_ON_LS:
|
355
|
|
356
|
|
357
|
damage = json.loads(
|
358
|
self.fs.mon_manager.raw_cluster_cmd(
|
359
|
'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
|
360
|
if len(damage) == 0:
|
361
|
results[mutation] = EIO_NO_DAMAGE
|
362
|
|
363
|
failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
|
364
|
if failures:
|
365
|
log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
|
366
|
for mutation, result in failures:
|
367
|
log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
|
368
|
mutation.expectation, result, mutation.desc
|
369
|
))
|
370
|
raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
|
371
|
else:
|
372
|
log.info("All {0} mutations had expected outcomes".format(len(mutations)))
|
373
|
|
374
|
def test_damaged_dentry(self):
|
375
|
|
376
|
|
377
|
|
378
|
|
379
|
|
380
|
self.mount_a.run_shell(["mkdir", "subdir/"])
|
381
|
|
382
|
self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
|
383
|
self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
|
384
|
|
385
|
subdir_ino = self.mount_a.path_to_ino("subdir")
|
386
|
|
387
|
self.mount_a.umount_wait()
|
388
|
for mds_name in self.fs.get_active_names():
|
389
|
self.fs.mds_asok(["flush", "journal"], mds_name)
|
390
|
|
391
|
self.fs.mds_stop()
|
392
|
self.fs.mds_fail()
|
393
|
|
394
|
|
395
|
junk = "deadbeef" * 10
|
396
|
dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
|
397
|
self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
|
398
|
|
399
|
|
400
|
self.fs.mds_restart()
|
401
|
self.fs.wait_for_daemons()
|
402
|
|
403
|
self.mount_a.mount_wait()
|
404
|
dentries = self.mount_a.ls("subdir/")
|
405
|
|
406
|
|
407
|
self.assertEqual(dentries, ["file_undamaged"])
|
408
|
|
409
|
|
410
|
|
411
|
try:
|
412
|
self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
|
413
|
except CommandFailedError as e:
|
414
|
self.assertEqual(e.exitstatus, errno.ENOENT)
|
415
|
else:
|
416
|
raise AssertionError("Expected ENOENT")
|
417
|
|
418
|
|
419
|
damage = json.loads(
|
420
|
self.fs.mon_manager.raw_cluster_cmd(
|
421
|
'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
|
422
|
"damage", "ls", '--format=json-pretty'))
|
423
|
self.assertEqual(len(damage), 1)
|
424
|
damage_id = damage[0]['id']
|
425
|
|
426
|
|
427
|
|
428
|
try:
|
429
|
self.mount_a.touch("subdir/file_to_be_damaged")
|
430
|
except CommandFailedError as e:
|
431
|
self.assertEqual(e.exitstatus, errno.EIO)
|
432
|
else:
|
433
|
raise AssertionError("Expected EIO")
|
434
|
|
435
|
|
436
|
|
437
|
try:
|
438
|
self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
|
439
|
except CommandFailedError as e:
|
440
|
if isinstance(self.mount_a, FuseMount):
|
441
|
self.assertEqual(e.exitstatus, errno.EIO)
|
442
|
else:
|
443
|
|
444
|
self.assertIn(e.exitstatus, [errno.ENOENT, errno.EIO])
|
445
|
else:
|
446
|
raise AssertionError("Expected EIO")
|
447
|
|
448
|
nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
|
449
|
self.assertEqual(nfiles, "2")
|
450
|
|
451
|
self.mount_a.umount_wait()
|
452
|
|
453
|
|
454
|
scrub_json = self.fs.rank_tell(["scrub", "start", "/subdir", "repair"])
|
455
|
log.info(json.dumps(scrub_json, indent=2))
|
456
|
|
457
|
self.assertEqual(scrub_json["passed_validation"], False)
|
458
|
self.assertEqual(scrub_json["raw_stats"]["checked"], True)
|
459
|
self.assertEqual(scrub_json["raw_stats"]["passed"], False)
|
460
|
|
461
|
|
462
|
self.mount_a.mount_wait()
|
463
|
nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
|
464
|
self.assertEqual(nfiles, "1")
|
465
|
|
466
|
|
467
|
self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
|
468
|
|
469
|
|
470
|
self.fs.mon_manager.raw_cluster_cmd(
|
471
|
'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
|
472
|
"damage", "rm", "{did}".format(did=damage_id))
|
473
|
|
474
|
|
475
|
|
476
|
self.mount_a.touch("subdir/file_to_be_damaged")
|
477
|
|
478
|
def test_open_ino_errors(self):
|
479
|
"""
|
480
|
That errors encountered during opening inos are properly propagated
|
481
|
"""
|
482
|
|
483
|
self.mount_a.run_shell(["mkdir", "dir1"])
|
484
|
self.mount_a.run_shell(["touch", "dir1/file1"])
|
485
|
self.mount_a.run_shell(["mkdir", "dir2"])
|
486
|
self.mount_a.run_shell(["touch", "dir2/file2"])
|
487
|
self.mount_a.run_shell(["mkdir", "testdir"])
|
488
|
self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
|
489
|
self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
|
490
|
|
491
|
file1_ino = self.mount_a.path_to_ino("dir1/file1")
|
492
|
file2_ino = self.mount_a.path_to_ino("dir2/file2")
|
493
|
dir2_ino = self.mount_a.path_to_ino("dir2")
|
494
|
|
495
|
|
496
|
self.mount_a.umount_wait()
|
497
|
self.fs.mds_asok(["flush", "journal"])
|
498
|
|
499
|
|
500
|
self.mds_cluster.mds_stop()
|
501
|
self.fs.journal_tool(['journal', 'reset'], 0)
|
502
|
self.mds_cluster.mds_fail_restart()
|
503
|
self.fs.wait_for_daemons()
|
504
|
|
505
|
self.mount_a.mount_wait()
|
506
|
|
507
|
|
508
|
|
509
|
|
510
|
self.fs.read_backtrace(file1_ino)
|
511
|
|
512
|
|
513
|
self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
|
514
|
|
515
|
|
516
|
ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
|
517
|
try:
|
518
|
ran.wait()
|
519
|
except CommandFailedError:
|
520
|
self.assertTrue("Input/output error" in ran.stderr.getvalue())
|
521
|
|
522
|
|
523
|
damage = json.loads(
|
524
|
self.fs.mon_manager.raw_cluster_cmd(
|
525
|
'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
|
526
|
"damage", "ls", '--format=json-pretty'))
|
527
|
self.assertEqual(len(damage), 1)
|
528
|
self.assertEqual(damage[0]['damage_type'], "backtrace")
|
529
|
self.assertEqual(damage[0]['ino'], file1_ino)
|
530
|
|
531
|
self.fs.mon_manager.raw_cluster_cmd(
|
532
|
'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
|
533
|
"damage", "rm", str(damage[0]['id']))
|
534
|
|
535
|
|
536
|
|
537
|
|
538
|
self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
|
539
|
|
540
|
|
541
|
ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
|
542
|
try:
|
543
|
ran.wait()
|
544
|
except CommandFailedError:
|
545
|
self.assertTrue("Input/output error" in ran.stderr.getvalue())
|
546
|
|
547
|
|
548
|
damage = json.loads(
|
549
|
self.fs.mon_manager.raw_cluster_cmd(
|
550
|
'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
|
551
|
"damage", "ls", '--format=json-pretty'))
|
552
|
self.assertEqual(len(damage), 2)
|
553
|
if damage[0]['damage_type'] == "backtrace" :
|
554
|
self.assertEqual(damage[0]['ino'], file2_ino)
|
555
|
self.assertEqual(damage[1]['damage_type'], "dir_frag")
|
556
|
self.assertEqual(damage[1]['ino'], dir2_ino)
|
557
|
else:
|
558
|
self.assertEqual(damage[0]['damage_type'], "dir_frag")
|
559
|
self.assertEqual(damage[0]['ino'], dir2_ino)
|
560
|
self.assertEqual(damage[1]['damage_type'], "backtrace")
|
561
|
self.assertEqual(damage[1]['ino'], file2_ino)
|
562
|
|
563
|
for entry in damage:
|
564
|
self.fs.mon_manager.raw_cluster_cmd(
|
565
|
'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
|
566
|
"damage", "rm", str(entry['id']))
|