Project

General

Profile

Actions

Bug #46451

open

RGW multi-site - bug related with oprhaned objects in default.rgw.buckets.index on secondary sites

Added by Piotr O almost 4 years ago. Updated almost 4 years ago.

Status:
New
Priority:
Normal
Assignee:
-
Target version:
-
% Done:

0%

Source:
Tags:
Backport:
Regression:
No
Severity:
2 - major
Reviewed:
Affected Versions:
ceph-qa-suite:
rgw
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

To reproduce this case i created some two small clusters configured for multi-site replication.

rgw-1: one node cluster (multisite master)
rgw-2: one node cluster (multisite secondary)

Some details below:

[root@rgw-1 ~]# ceph versions {
"mon": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 1
},
"mgr": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 1
},
"osd": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 1
},
"mds": {},
"rgw": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 2
},
"overall": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 5
}
}
[root@rgw-1 ~]# ceph -s
cluster:
id: 2bd71386-0479-4499-bbd2-c35e8aba7c6d
health: HEALTH_WARN
7 pool(s) have no replicas configured

services:
mon: 1 daemons, quorum rgw-1 (age 11m)
mgr: rgw-1(active, since 10m)
osd: 1 osds: 1 up (since 10m), 1 in (since 9d)
rgw: 2 daemons active (rgw01, rgw02)
task status:
data:
pools: 7 pools, 224 pgs
objects: 3.28k objects, 27 KiB
usage: 6.2 GiB used, 29 GiB / 35 GiB avail
pgs: 224 active+clean
io:
client: 39 KiB/s rd, 0 B/s wr, 38 op/s rd, 10 op/s wr

[root@rgw-1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.03419 root default
-3 0.03419 host rgw-1
0 hdd 0.03419 osd.0 up 1.00000 1.00000

[root@rgw-1 ~]# cat /etc/ceph/ceph.conf
[global]
fsid = 2bd71386-0479-4499-bbd2-c35e8aba7c6c
mon_initial_members = rgw-1
mon_host = 10.32.37.140
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx

osd crush chooseleaf type = 0
osd pool default size = 1
osd pool default min size = 1

rgw sync log trim concurrent buckets = 128
rgw sync log trim max buckets = 512
rgw sync log trim interval = 60
rgw sync log trim min cold buckets = 64

rgw override bucket index max shards = 32

[client.rgw.rgw01]
rgw frontends = beast port=7081
rgw run sync thread = true

[client.rgw.rgw02]
rgw frontends = beast port=7082

[root@rgw-1 ~]# radosgw-admin sync status
realm 9d14cfa1-70b2-4ffa-87c0-2927d5a619db (gwp)
zonegroup d20dbb28-cf27-4954-b536-1e1007b652d9 (dc1_zonegroup)
zone e6f72dad-2d54-4bea-9236-6f95964a8694 (dc1_master)
metadata sync no sync (zone is master)
data sync source: 2a863b06-5444-49d1-9865-b372a91ba5c6
(dc2_secondary)
syncing
full sync: 0/128 shards
incremental sync: 128/128 shards
data is caught up with source

[root@rgw-1 ~]# radosgw-admin bucket list
[
"test"
]
[root@rgw-1 ~]# radosgw-admin bucket stats --bucket test {
"bucket": "test",
"num_shards": 8,
"tenant": "",
"zonegroup": "d20dbb28-cf27-4954-b536-1e1007b652d9",
"placement_rule": "default-placement",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
},
"id": "e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1",
"marker": "e6f72dad-2d54-4bea-9236-6f95964a8694.4827.1",
"index_type": "Normal",
"owner": "user",
"ver": "0#222,1#210,2#210,3#208,4#218,5#212,6#208,7#198",
"master_ver": "0#0,1#0,2#0,3#0,4#0,5#0,6#0,7#0",
"mtime": "2020-06-29 11:54:02.827001Z",
"max_marker":"0#00000000221.335.5,1#00000000209.323.5,2#00000000209.331.5,3#00000000207.302.5,4#00000000217.319.5,5#00000000211.309.5,6#00000000207.311.5,7#00000000197.327.5",
"usage": {
"rgw.main": {
"size": 3146,
"size_actual": 6443008,
"size_utilized": 3146,
"size_kb": 4,
"size_kb_actual": 6292,
"size_kb_utilized": 4,
"num_objects": 1573
}
},
"bucket_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
}
}

[root@rgw-1 ~]# rados -p default.rgw.buckets.index ls
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.0
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.3
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.1
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.5
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.7
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.4
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.6
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.2

[root@rgw-1 ~]# rados -p default.rgw.buckets.index ls |wc -l
8

[root@rgw-1 ~]# radosgw-admin mdlog list
No --period given, using current period=c95c3f1b-b77e-4818-8174-4497357c1c22
[]

[root@rgw-2 ~]# ceph versions {
"mon": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 1
},
"mgr": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 1
},
"osd": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 1
},
"mds": {},
"rgw": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 2
},
"overall": {
"ceph version 14.2.10(b340acf629a010a74d90da5782a2c5fe0b54ac20) nautilus (stable)": 5
}
}
[root@rgw-2 ~]# ceph -s
cluster:
id: 2bd71386-0479-4499-bbd2-c35e8aba7c6d
health: HEALTH_WARN
6 pool(s) have no replicas configured
1 daemons have recently crashed

services:
mon: 1 daemons, quorum rgw-2 (age 22h)
mgr: rgw-2(active, since 22h)
osd: 1 osds: 1 up (since 22h), 1 in (since 9d)
rgw: 2 daemons active (rgw01, rgw02)
task status:
data:
pools: 6 pools, 192 pgs
objects: 3.25k objects, 22 KiB
usage: 1.2 GiB used, 29 GiB / 30 GiB avail
pgs: 192 active+clean
io:
client: 18 KiB/s rd, 18 op/s rd, 0 op/s wr

[root@rgw-2 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.02930 root default
-3 0.02930 host rgw-2
0 hdd 0.02930 osd.0 up 1.00000 1.00000

[root@rgw-2 ~]# cat /etc/ceph/ceph.conf
[global]
fsid = 2bd71386-0479-4499-bbd2-c35e8aba7c6d
mon_initial_members = rgw-2
mon_host = 10.32.37.141
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx

osd crush chooseleaf type = 0
osd pool default size = 1
osd pool default min size = 1

rgw sync log trim concurrent buckets = 128
rgw sync log trim max buckets = 512
rgw sync log trim interval = 60
rgw sync log trim min cold buckets = 64

rgw override bucket index max shards = 32

[client.rgw.rgw01]
rgw frontends = beast port=7081
rgw run sync thread = true

[client.rgw.rgw02]
rgw frontends = beast port=7082

[root@rgw-2 ~]# radosgw-admin sync status
realm 9d14cfa1-70b2-4ffa-87c0-2927d5a619db (gwp)
zonegroup d20dbb28-cf27-4954-b536-1e1007b652d9 (dc1_zonegroup)
zone 2a863b06-5444-49d1-9865-b372a91ba5c6 (dc2_secondary)
metadata sync syncing
full sync: 0/64 shards
incremental sync: 64/64 shards
metadata is caught up with master
data sync source: e6f72dad-2d54-4bea-9236-6f95964a8694
(dc1_master)
syncing
full sync: 0/128 shards
incremental sync: 128/128 shards
data is caught up with source

[root@rgw-2 ~]# radosgw-admin bucket list
[
"test"
]
[root@rgw-2 ~]# radosgw-admin bucket stats --bucket test {
"bucket": "test",
"num_shards": 8,
"tenant": "",
"zonegroup": "d20dbb28-cf27-4954-b536-1e1007b652d9",
"placement_rule": "default-placement",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
},
"id": "e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1",
"marker": "e6f72dad-2d54-4bea-9236-6f95964a8694.4827.1",
"index_type": "Normal",
"owner": "user",
"ver": "0#398,1#382,2#389,3#410,4#413,5#390,6#403,7#380",
"master_ver": "0#0,1#0,2#0,3#0,4#0,5#0,6#0,7#0",
"mtime": "2020-06-29 11:54:02.827001Z",
"max_marker":"0#00000000397.409.5,1#00000000381.399.5,2#00000000388.404.5,3#00000000409.426.5,4#00000000412.425.5,5#00000000389.408.5,6#00000000402.413.5,7#00000000379.389.5",
"usage": {
"rgw.main": {
"size": 3146,
"size_actual": 6443008,
"size_utilized": 3146,
"size_kb": 4,
"size_kb_actual": 6292,
"size_kb_utilized": 4,
"num_objects": 1573
}
},
"bucket_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
}
}

[root@rgw-2 ~]# rados -p default.rgw.buckets.index ls
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.0
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.3
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.1
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.5
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.7
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.4
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.6
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.2

[root@rgw-2 ~]# rados -p default.rgw.buckets.index ls |wc -l
8

[root@rgw-2 ~]# radosgw-admin mdlog list
No --period given, using current period=c95c3f1b-b77e-4818-8174-
4497357c1c22
[]

At this moment I have clusters and replication in good condition.

So, Let's go to corrupt this.

Run command (upload 1000 objects, by using "rclone copyto" function):
posinski@po:~/s3cfg/dev/rclone-v1.51.0-linux-amd64 $ for i in `seq 1 1000`; do ./rclone --config rclone.conf copyto ./git-log.txt ceph:test/test-$i; done

After some time, all objects are successfully uploaded.

posinski@po:~/s3cfg/dev/rclone-v1.51.0-linux-amd64 $ s3cmd c../.s3cfg_dc1 ls "s3://test/test*"|wc -l
1000

posinski@po:~/s3cfg/dev/rclone-v1.51.0-linux-amd64 $ s3cmd c../.s3cfg_dc2 ls "s3://test/test*"|wc -l
1000

So look at default.rgw.buckets.index pool on both sites:

[root@rgw-1 ~]# rados -p default.rgw.buckets.index ls
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.0
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.3
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.1
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.5
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.7
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.4
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.6
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.2

[root@rgw-2 ~]# rados -p default.rgw.buckets.index ls
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.22
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.14
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.1
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.11
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.4
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.31
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.0
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.0
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.30
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.10
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.3
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.1
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.17
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.21
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.12
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.5
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.29
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.20
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.28
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.8
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.7
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.26
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.23
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.19
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.27
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.4
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.24
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.6
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.15
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.7
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.23236.1.2
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.9
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.25
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.16
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.5
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.2
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.18
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.13
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.6
.dir.e6f72dad-2d54-4bea-9236-6f95964a8694.135545.384.3

On secondary site, in default.rgw.buckets.index appear many weird, orphaned objects..

Sometimes mdlogs on master zone, related with this operations are not trimmed. But its to hard to reproduce this case.

I tested it on: 14.2.9, 14.2.10.


Files

copyto.pcap (5.03 KB) copyto.pcap Piotr O, 07/23/2020 08:47 PM
Actions

Also available in: Atom PDF