Bug #53030
rgw nfs export at user-level crash on readdir
0%
Description
Oct 25 10:45:37 dael conmon[2737231]: *** Caught signal (Segmentation fault) ** Oct 25 10:45:37 dael conmon[2737231]: in thread 7f41bc555700 thread_name:ganesha.nfsd Oct 25 10:45:37 dael conmon[2737231]: ceph version 17.0.0-7183-g54142666 (54142666e5705ced88e3e2d91ddc0ff29867a362) quincy (dev) Oct 25 10:45:37 dael conmon[2737231]: 1: /lib64/libpthread.so.0(+0x12b20) [0x7f4219fceb20] Oct 25 10:45:37 dael conmon[2737231]: 2: (rgw::ARN::ARN(rgw_bucket const&)+0x42) [0x7f41daf97262] Oct 25 10:45:37 dael conmon[2737231]: 3: (verify_bucket_permission(DoutPrefixProvider const*, perm_state_base*, rgw_bucket const&, RGWAccessControlPolicy*, RGWAccessControlPolicy*, boost::optional<rgw::IAM::Policy> const&, std::vector<rgw::IAM::Policy, std::allocator<rgw::IAM::Policy> > const&, std::vector<rgw::IAM::Policy, std::allocator<rgw::IAM::Policy> > const&, unsigned long)+0xa2) [0x7f41dab95442] Oct 25 10:45:37 dael conmon[2737231]: 4: (verify_bucket_permission(DoutPrefixProvider const*, req_state*, unsigned long)+0x83) [0x7f41dab96283] Oct 25 10:45:37 dael conmon[2737231]: 5: (RGWListBucket::verify_permission(optional_yield)+0x12e) [0x7f41dac6079e] Oct 25 10:45:37 dael conmon[2737231]: 6: (rgw::RGWLibProcess::process_request(rgw::RGWLibRequest*, rgw::RGWLibIO*)+0xd85) [0x7f41daada955] Oct 25 10:45:37 dael conmon[2737231]: 7: (rgw::RGWLibProcess::process_request(rgw::RGWLibRequest*)+0x49) [0x7f41daadbaf9] Oct 25 10:45:37 dael conmon[2737231]: 8: (rgw::RGWFileHandle::readdir(bool (*)(char const*, void*, unsigned long, stat*, unsigned int, unsigned int), void*, boost::variant<unsigned long*, char const*>, bool*, unsigned int)+0x184) [0x7f41daaf02d4] Oct 25 10:45:37 dael conmon[2737231]: 9: rgw_readdir2() Oct 25 10:45:37 dael conmon[2737231]: 10: /usr/lib64/ganesha/libfsalrgw.so(+0x5100) [0x7f41fc3e9100] Oct 25 10:45:37 dael conmon[2737231]: 11: /lib64/libganesha_nfsd.so.3.5(+0x1278f7) [0x7f421bd718f7] Oct 25 10:45:37 dael conmon[2737231]: 12: /lib64/libganesha_nfsd.so.3.5(+0x129173) [0x7f421bd73173] Oct 25 10:45:37 dael conmon[2737231]: 13: fsal_readdir() Oct 25 10:45:37 dael conmon[2737231]: 14: /lib64/libganesha_nfsd.so.3.5(+0xe7d95) [0x7f421bd31d95] Oct 25 10:45:37 dael conmon[2737231]: 15: /lib64/libganesha_nfsd.so.3.5(+0xcf78f) [0x7f421bd1978f] Oct 25 10:45:37 dael conmon[2737231]: 16: /lib64/libganesha_nfsd.so.3.5(+0xd0927) [0x7f421bd1a927] Oct 25 10:45:37 dael conmon[2737231]: 17: /lib64/libganesha_nfsd.so.3.5(+0x50c46) [0x7f421bc9ac46] Oct 25 10:45:37 dael conmon[2737231]: 18: /lib64/libntirpc.so.3.5(+0x25800) [0x7f421ba28800] Oct 25 10:45:37 dael conmon[2737231]: 19: /lib64/libntirpc.so.3.5(+0x22bf9) [0x7f421ba25bf9] Oct 25 10:45:37 dael conmon[2737231]: 20: /lib64/libntirpc.so.3.5(+0x235d8) [0x7f421ba265d8] Oct 25 10:45:37 dael conmon[2737231]: 21: /lib64/libntirpc.so.3.5(+0x2e65d) [0x7f421ba3165d] Oct 25 10:45:37 dael conmon[2737231]: 22: /lib64/libpthread.so.0(+0x814a) [0x7f4219fc414a] Oct 25 10:45:37 dael conmon[2737231]: 23: clone()
triggered with this export
EXPORT {
FSAL {
name = "RGW";
user_id = "sage";
access_key_id = "5MRIMLJVB1ODIEREBD08";
secret_access_key = "TlfACgAqHZclGlAZbF3aoRof8ibkDqsiLYh0ADV3";
}
export_id = 2;
path = "";
pseudo = "/sage";
access_type = "RW";
squash = "none";
attr_expiration_time = 0;
security_label = true;
protocols = 4;
transports = "TCP";
}
and user
{
"user_id": "sage",
"display_name": "sage weil",
"email": "",
"suspended": 0,
"max_buckets": 1000,
"subusers": [],
"keys": [
{
"user": "sage",
"access_key": "5MRIMLJVB1ODIEREBD08",
"secret_key": "TlfACgAqHZclGlAZbF3aoRof8ibkDqsiLYh0ADV3"
}
],
"swift_keys": [],
"caps": [],
"op_mask": "read, write, delete",
"default_placement": "",
"default_storage_class": "",
"placement_tags": [],
"bucket_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"user_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"temp_url_keys": [],
"type": "rgw",
"mfa_ids": []
}
Related issues
History
#1 Updated by Casey Bodley almost 2 years ago
- Assignee set to Matt Benjamin
- Tags set to nfs
#2 Updated by Sage Weil almost 2 years ago
- Assignee deleted (
Matt Benjamin) - Tags deleted (
nfs)
I'm reproducing this with this branch: https://github.com/liewegas/ceph/tree/nfs-rgw-by-user
bin/radosgw-admin user create --uid sage --display-name 'sage weil' bin/ceph nfs cluster create foo --port 12345 bin/ceph nfs export create rgw foo /sage --user-id sage
then on another host,
mount -t <ip>:/sage /mnt/foo -o port=12345 ls /mnt/foo # triggers the crash
#3 Updated by Sage Weil almost 2 years ago
same crash on pacific:
Oct 27 11:31:33 dael conmon[1881242]: *** Caught signal (Segmentation fault) ** Oct 27 11:31:33 dael conmon[1881242]: in thread 7f84b85f6700 thread_name:ganesha.nfsd Oct 27 11:31:33 dael conmon[1881242]: ceph version 16.2.6-242-g9c8bdbc8 (9c8bdbc8afe90dacd18f2ee15044f48109538abe) pacific (stable) Oct 27 11:31:33 dael conmon[1881242]: 1: /lib64/libpthread.so.0(+0x12b20) [0x7f852ad6eb20] Oct 27 11:31:33 dael conmon[1881242]: 2: (rgw::ARN::ARN(rgw_bucket const&)+0x42) [0x7f84ec9a04d2] Oct 27 11:31:33 dael conmon[1881242]: 3: (verify_bucket_permission(DoutPrefixProvider const*, perm_state_base*, rgw_bucket const&, RGWAccessControlPolicy*, RGWAccessControlPolicy*, boost::optional<rgw::IAM::Policy> const&, std::vector<rgw::IAM::Policy, std::allocator<rgw::IAM::Policy> > const&, std::vector<rgw::IAM::Policy, std::allocator<rgw::IAM::Policy> > const&, unsigned long)+0xa2) [0x7f84ec5e4a32] Oct 27 11:31:33 dael conmon[1881242]: 4: (verify_bucket_permission(DoutPrefixProvider const*, req_state*, unsigned long)+0x83) [0x7f84ec5e5863] Oct 27 11:31:33 dael conmon[1881242]: 5: (RGWListBucket::verify_permission(optional_yield)+0x12e) [0x7f84ec6a568e] Oct 27 11:31:33 dael conmon[1881242]: 6: (rgw::RGWLibProcess::process_request(rgw::RGWLibRequest*, rgw::RGWLibIO*)+0xd66) [0x7f84ec504b86] Oct 27 11:31:33 dael conmon[1881242]: 7: (rgw::RGWLibProcess::process_request(rgw::RGWLibRequest*)+0x49) [0x7f84ec505d09] Oct 27 11:31:33 dael conmon[1881242]: 8: (rgw::RGWFileHandle::readdir(bool (*)(char const*, void*, unsigned long, stat*, unsigned int, unsigned int), void*, boost::variant<unsigned long*, char const*>, bool*, unsigned int)+0x184) [0x7f84ec51b2a4] Oct 27 11:31:33 dael conmon[1881242]: 9: rgw_readdir2() Oct 27 11:31:33 dael conmon[1881242]: 10: /usr/lib64/ganesha/libfsalrgw.so(+0x629c) [0x7f850c3eb29c] Oct 27 11:31:33 dael conmon[1881242]: 11: /lib64/libganesha_nfsd.so.3.5(+0x11fd87) [0x7f852c8fcd87] Oct 27 11:31:33 dael conmon[1881242]: 12: /lib64/libganesha_nfsd.so.3.5(+0x1215b3) [0x7f852c8fe5b3] Oct 27 11:31:33 dael conmon[1881242]: 13: fsal_readdir() Oct 27 11:31:33 dael conmon[1881242]: 14: /lib64/libganesha_nfsd.so.3.5(+0xe2b22) [0x7f852c8bfb22] Oct 27 11:31:33 dael conmon[1881242]: 15: /lib64/libganesha_nfsd.so.3.5(+0xcacaf) [0x7f852c8a7caf] Oct 27 11:31:33 dael conmon[1881242]: 16: /lib64/libganesha_nfsd.so.3.5(+0xcbdf7) [0x7f852c8a8df7] Oct 27 11:31:33 dael conmon[1881242]: 17: /lib64/libganesha_nfsd.so.3.5(+0x4d116) [0x7f852c82a116] Oct 27 11:31:33 dael conmon[1881242]: 18: /lib64/libntirpc.so.3.4(+0x22efc) [0x7f852c5bcefc] Oct 27 11:31:33 dael conmon[1881242]: 19: /lib64/libntirpc.so.3.4(+0x20301) [0x7f852c5ba301] Oct 27 11:31:33 dael conmon[1881242]: 20: /lib64/libntirpc.so.3.4(+0x20db8) [0x7f852c5badb8] Oct 27 11:31:33 dael conmon[1881242]: 21: /lib64/libntirpc.so.3.4(+0x2b584) [0x7f852c5c5584] Oct 27 11:31:33 dael conmon[1881242]: 22: /lib64/libpthread.so.0(+0x814a) [0x7f852ad6414a] Oct 27 11:31:33 dael conmon[1881242]: 23: clone()
If I try the octopus container I get EACCESS from getdents64
#4 Updated by Sage Weil almost 2 years ago
- Backport set to pacific
#5 Updated by Casey Bodley almost 2 years ago
- Assignee set to Matt Benjamin
#6 Updated by Matt Benjamin almost 2 years ago
Hi Folks,
I was able to build and run a ceph master librgw with nfs-ganesha next, as of thursday afternoon, without apparent problems, using a ganesha.conf based on the one provided.
A couple of notes:
1. I did create a "sage" user using radosgw-adamin and used it as the mounting user, as implied
2. the ganesha.conf is (exactly) as below--so the only differences from a cephadm-generated one would be in the rados-url stubs and substitution, so possibly the RGW {} config section could differ slightly, but that seems unlikely to make any kind of difference
have a look:
###################################################
#
# EXPORT
#
# To function, all that is required is an EXPORT
#
# Define the absolute minimal export
#
###################################################
EXPORT {
FSAL {
name = "RGW";
user_id = "sage";
access_key_id = "5MRIMLJVB1ODIEREBD08";
secret_access_key = "TlfACgAqHZclGlAZbF3aoRof8ibkDqsiLYh0ADV3";
}
export_id = 2;
path = "";
#path = "/"; # I thought path might need to be /, but "" worked fine
pseudo = "/sage";>
access_type = "RW";
squash = "none";
attr_expiration_time = 0;
security_label = true;
protocols = 4;
transports = "TCP";
}
RGW {
ceph_conf = "/home/mbenjamin/ceph-cp/build/ceph.conf";
#init_args = "--debug-rgw=16 --log-file=/tmp/nfs.log";
init_args = "--debug-rgw=20";
}
NFS_Core_Param {
#Nb_Worker = 3;
#RPC_Debug_Flags = 2147483648;
mount_path_pseudo = true;
}
Cache_Inode {
#Dir_Max = 10000;
#Dir_Max = 200000;
#Cache_Size = 1000000;
#Entries_Hwmark = 1000000;
#Dir_Chunk = 0;
#Cache_Size = 191;
#Entries_Hwmark = 191;
}
NFSV4 {
Graceless = true;
Allow_Numeric_Owners = true;
Only_Numeric_Owners = true;
}
LOG {
# Default_Log_Level is unknown token??
#Default_Log_Level = NIV_FULL_DEBUG;
Components {
#ALL = FULL_DEBUG;
MEMLEAKS = FATAL;
FSAL = FATAL;
NFSPROTO = FULL_DEBUG;
NFS_V4 = FULL_DEBUG;
EXPORT = FATAL;
FILEHANDLE = FATAL;
DISPATCH = FATAL;
CACHE_INODE = FATAL;
CACHE_INODE_LRU = FATAL;
HASHTABLE = FATAL;
HASHTABLE_CACHE = FATAL;
DUPREQ = FATAL;
INIT = FATAL;
MAIN = FATAL;
IDMAPPER = FATAL;
NFS_READDIR = FULL_DEBUG;
NFS_V4_LOCK = FATAL;
CONFIG = FATAL;
CLIENTID = FATAL;
SESSIONS = FATAL;
PNFS = FATAL;
RW_LOCK = FATAL;
NLM = FATAL;
RPC = FATAL;
NFS_CB = FATAL;
THREAD = FATAL;
NFS_V4_ACL = FATAL;
STATE = FATAL;
# 9P = FATAL;
# 9P_DISPATCH = FATAL;
FSAL_UP = FATAL;
DBUS = FATAL;
}
Facility {
name = FILE;
destination = "/tmp/ganesha-rgw.log";
enable = active;
}
}
#7 Updated by Sage Weil almost 2 years ago
- Status changed from New to Closed
It looks like `path = ""` was the problem; `path = "/"` works.
#8 Updated by Matt Benjamin almost 2 years ago
- Status changed from Closed to Fix Under Review
- Pull request ID set to 43773
#9 Updated by Casey Bodley almost 2 years ago
- Status changed from Fix Under Review to Pending Backport
- Tags set to nfs
#10 Updated by Backport Bot almost 2 years ago
- Copied to Backport #53256: pacific: rgw nfs export at user-level crash on readdir added
#11 Updated by Loïc Dachary over 1 year ago
- Status changed from Pending Backport to Resolved
While running with --resolve-parent, the script "backport-create-issue" noticed that all backports of this issue are in status "Resolved" or "Rejected".