Support #36427
closedCeph mds is stuck in creating status
0%
Description
I successfully deployed Ceph cluster with 16 OSDs and created CephFS before.
But after crash due to mds slow request, when creating CephFS, Ceph mds goes creating status and never changes.
Seeing Ceph status, there is no other problem I think. Here is 'ceph -s' result:
csl@hpc1:~/iodc$ ceph -s
cluster:
id: 1a32c483-cb2e-4ab3-ac60-02966a8fd327
health: HEALTH_OK
services:
mon: 1 daemons, quorum hpc1
mgr: hpc1(active)
mds: cephfs-1/1/1 up {0=hpc1=up:creating}
osd: 16 osds: 16 up, 16 in
data:
pools: 2 pools, 640 pgs
objects: 7 objects, 124B
usage: 34.3GiB used, 116TiB / 116TiB avail
pgs: 640 active+clean
However, CephFS still works in case of 8 OSDs.
Is there any doubt of this phenomenon, please let me know. Thank you.
PS. I attached my ceph.conf contents:
[global]
fsid = 1a32c483-cb2e-4ab3-ac60-02966a8fd327
mon_initial_members = hpc1
mon_host = 192.168.40.10
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
public_network = 192.168.40.0/24
cluster_network = 192.168.40.0/24
[osd]
osd journal size = 1024
osd max object name len = 256
osd max object namespace len = 64
osd mount options f2fs = active_logs=2
[osd.0]
host = hpc9
public_addr = 192.168.40.18
cluster_addr = 192.168.40.18
[osd.1]
host = hpc10
public_addr = 192.168.40.19
cluster_addr = 192.168.40.19
[osd.2]
host = hpc9
public_addr = 192.168.40.18
cluster_addr = 192.168.40.18
[osd.3]
host = hpc10
public_addr = 192.168.40.19
cluster_addr = 192.168.40.19
[osd.4]
host = hpc9
public_addr = 192.168.40.18
cluster_addr = 192.168.40.18
[osd.5]
host = hpc10
public_addr = 192.168.40.19
cluster_addr = 192.168.40.19
[osd.6]
host = hpc9
public_addr = 192.168.40.18
cluster_addr = 192.168.40.18
[osd.7]
host = hpc10
public_addr = 192.168.40.19
cluster_addr = 192.168.40.19
[osd.8]
host = hpc9
public_addr = 192.168.40.18
cluster_addr = 192.168.40.18
[osd.9]
host = hpc10
public_addr = 192.168.40.19
cluster_addr = 192.168.40.19
[osd.10]
host = hpc9
public_addr = 192.168.10.18
cluster_addr = 192.168.40.18
[osd.11]
host = hpc10
public_addr = 192.168.10.19
cluster_addr = 192.168.40.19
[osd.12]
host = hpc9
public_addr = 192.168.10.18
cluster_addr = 192.168.40.18
[osd.13]
host = hpc10
public_addr = 192.168.10.19
cluster_addr = 192.168.40.19
[osd.14]
host = hpc9
public_addr = 192.168.10.18
cluster_addr = 192.168.40.18
[osd.15]
host = hpc10
public_addr = 192.168.10.19
cluster_addr = 192.168.40.19
Updated by Zheng Yan over 5 years ago
what is output of 'ceph daemon mds.hpc1 objecter_requests'?
Updated by Kisik Jeong over 5 years ago
Here is the output of the command:
csl@hpc1:~$ sudo ceph daemon mds.hpc1 objecter_requests
{
"ops": [
{
"tid": 34,
"pg": "2.95e3ab",
"osd": 10,
"object_id": "200.00000003",
"object_locator": "@2",
"target_object_id": "200.00000003",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.314018s",
"osd_ops": [
"delete"
]
},
{
"tid": 2,
"pg": "2.64e96f8f",
"osd": 11,
"object_id": "400.00000000",
"object_locator": "@2",
"target_object_id": "400.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.310709s",
"osd_ops": [
"writefull 0~22"
]
},
{
"tid": 31,
"pg": "2.d90270ad",
"osd": 11,
"object_id": "mds_snaptable",
"object_locator": "@2",
"target_object_id": "mds_snaptable",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.312639s",
"osd_ops": [
"writefull 0~46"
]
},
{
"tid": 3,
"pg": "2.6b2cdaff",
"osd": 13,
"object_id": "1.00000000",
"object_locator": "@2",
"target_object_id": "1.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.310902s",
"osd_ops": [
"omap-set-header"
]
},
{
"tid": 4,
"pg": "2.6b2cdaff",
"osd": 13,
"object_id": "1.00000000",
"object_locator": "@2",
"target_object_id": "1.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.310978s",
"osd_ops": [
"create",
"setxattr parent (30)",
"setxattr layout (30)"
]
},
{
"tid": 6,
"pg": "2.c2e541b0",
"osd": 13,
"object_id": "600.00000000",
"object_locator": "@2",
"target_object_id": "600.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.311147s",
"osd_ops": [
"omap-set-header"
]
},
{
"tid": 7,
"pg": "2.c2e541b0",
"osd": 13,
"object_id": "600.00000000",
"object_locator": "@2",
"target_object_id": "600.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.311219s",
"osd_ops": [
"create",
"setxattr parent (62)",
"setxattr layout (30)"
]
},
{
"tid": 12,
"pg": "2.7a4d91b0",
"osd": 13,
"object_id": "603.00000000",
"object_locator": "@2",
"target_object_id": "603.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.311556s",
"osd_ops": [
"omap-set-header"
]
},
{
"tid": 13,
"pg": "2.7a4d91b0",
"osd": 13,
"object_id": "603.00000000",
"object_locator": "@2",
"target_object_id": "603.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.31163s",
"osd_ops": [
"create",
"setxattr parent (62)",
"setxattr layout (30)"
]
},
{
"tid": 18,
"pg": "2.f89eaaf4",
"osd": 13,
"object_id": "606.00000000",
"object_locator": "@2",
"target_object_id": "606.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.311899s",
"osd_ops": [
"omap-set-header"
]
},
{
"tid": 19,
"pg": "2.f89eaaf4",
"osd": 13,
"object_id": "606.00000000",
"object_locator": "@2",
"target_object_id": "606.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.311946s",
"osd_ops": [
"create",
"setxattr parent (62)",
"setxattr layout (30)"
]
},
{
"tid": 27,
"pg": "2.85dde07f",
"osd": 13,
"object_id": "100.00000000.inode",
"object_locator": "@2",
"target_object_id": "100.00000000.inode",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.312464s",
"osd_ops": [
"writefull 0~536"
]
},
{
"tid": 29,
"pg": "2.3270c60b",
"osd": 13,
"object_id": "mds0_sessionmap",
"object_locator": "@2",
"target_object_id": "mds0_sessionmap",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.312547s",
"osd_ops": [
"omap-set-header"
]
},
{
"tid": 32,
"pg": "2.6e5f474",
"osd": 13,
"object_id": "200.00000001",
"object_locator": "@2",
"target_object_id": "200.00000001",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.312752s",
"osd_ops": [
"delete"
]
},
{
"tid": 33,
"pg": "2.eb272dbb",
"osd": 13,
"object_id": "200.00000002",
"object_locator": "@2",
"target_object_id": "200.00000002",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.313998s",
"osd_ops": [
"delete"
]
},
{
"tid": 20,
"pg": "2.bb590b7c",
"osd": 14,
"object_id": "607.00000000",
"object_locator": "@2",
"target_object_id": "607.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.312018s",
"osd_ops": [
"omap-set-header"
]
},
{
"tid": 21,
"pg": "2.bb590b7c",
"osd": 14,
"object_id": "607.00000000",
"object_locator": "@2",
"target_object_id": "607.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.312066s",
"osd_ops": [
"create",
"setxattr parent (62)",
"setxattr layout (30)"
]
},
{
"tid": 1,
"pg": "2.844f3494",
"osd": 15,
"object_id": "200.00000000",
"object_locator": "@2",
"target_object_id": "200.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.310657s",
"osd_ops": [
"writefull 0~90"
]
},
{
"tid": 5,
"pg": "2.232c0e14",
"osd": 15,
"object_id": "1.00000000.inode",
"object_locator": "@2",
"target_object_id": "1.00000000.inode",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.311024s",
"osd_ops": [
"writefull 0~536"
]
},
{
"tid": 24,
"pg": "2.60b82d07",
"osd": 15,
"object_id": "609.00000000",
"object_locator": "@2",
"target_object_id": "609.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.312212s",
"osd_ops": [
"omap-set-header"
]
},
{
"tid": 25,
"pg": "2.60b82d07",
"osd": 15,
"object_id": "609.00000000",
"object_locator": "@2",
"target_object_id": "609.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.31225s",
"osd_ops": [
"create",
"setxattr parent (62)",
"setxattr layout (30)"
]
},
{
"tid": 26,
"pg": "2.c5265ab3",
"osd": 15,
"object_id": "100.00000000",
"object_locator": "@2",
"target_object_id": "100.00000000",
"target_object_locator": "@2",
"paused": 0,
"used_replica": 0,
"precalc_pgid": 0,
"last_sent": "149509s",
"attempts": 1,
"snapid": "head",
"snap_context": "0=[]",
"mtime": "2018-10-15 17:28:50.0.312411s",
"osd_ops": [
"omap-set-header",
"omap-set-vals"
]
}
],
"linger_ops": [],
"pool_ops": [],
"pool_stat_ops": [],
"statfs_ops": [],
"command_ops": []
}
Zheng Yan wrote:
what is output of 'ceph daemon mds.hpc1 objecter_requests'?
Updated by Zheng Yan over 5 years ago
looks like rados issue. retry restarting mds
Updated by Patrick Donnelly over 5 years ago
- Tracker changed from Bug to Support
- Status changed from New to Rejected
- Target version deleted (
v12.2.9) - Start date deleted (
10/15/2018)
Please take this question to the ceph-users list where you'll receive more eyes to help diagnose the issue. Once we're certain there's a bug, we can open a tracker ticket.