Bug #21413
closedin nfs-ganesha ceph_lock_op2 set lock implement have a bug.
0%
Description
node1: when I in client1 set file range lock of 1.txt in ceph cluster by nfs-gansha1 server,
node2: then in node2 client2 set the same range of 1.txt by nfs-gansha2 server success.
then I see the implement of ceph_lock_op2, is some different from glusterfs_lock_op2
ceph_lock_op2
fsal_status_t ceph_lock_op2(struct fsal_obj_handle *obj_hdl,
struct state_t *state,
void *owner,
fsal_lock_op_t lock_op,
fsal_lock_param_t *request_lock,
fsal_lock_param_t *conflicting_lock)
{
struct handle *myself = container_of(obj_hdl, struct handle, handle);
struct flock lock_args;
...
if (lock_op == FSAL_OP_LOCKT) {
retval = ceph_ll_getlk(export->cmount, my_fd, &lock_args,
(uint64_t) owner);
} else {
retval = ceph_ll_setlk(export->cmount, my_fd, &lock_args,
(uint64_t) owner, false); //this retval = -11;
}
if (retval < 0) {
LogDebug(COMPONENT_FSAL,
"%s returned %d %s",
lock_op == FSAL_OP_LOCKT
? "ceph_ll_getlk" : "ceph_ll_setlk",
-retval, strerror(-retval));
if (conflicting_lock != NULL) {
/* Get the conflicting lock */
retval = ceph_ll_getlk(export->cmount, my_fd,
&lock_args, (uint64_t) owner); //this get conflicting_lock success,then retval set by 0_
if (retval < 0) {
LogCrit(COMPONENT_FSAL,
"After failing a lock request, I couldn't even get the details of who owns the lock, error %d %s",
-retval, strerror(-retval));
goto err; //set lock failed, return 0 is this wrong?
}
if (conflicting_lock != NULL) {
conflicting_lock->lock_length = lock_args.l_len;
conflicting_lock->lock_start = lock_args.l_start;
conflicting_lock->lock_type = lock_args.l_type;
}
}
goto err;
}
....
err:
if (closefd)
(void) ceph_ll_close(myself->export->cmount, my_fd);
if (has_lock)
PTHREAD_RWLOCK_unlock(&obj_hdl->obj_lock);
return ceph2fsal_error(retval);
}
glusterfs_lock_op2
static fsal_status_t glusterfs_lock_op2(struct fsal_obj_handle *obj_hdl,
struct state_t *state,
void *p_owner,
fsal_lock_op_t lock_op,
fsal_lock_param_t *request_lock,
fsal_lock_param_t *conflicting_lock)
{
struct flock lock_args;
...
retval = glfs_posix_lock(my_fd.glfd, fcntl_comm, &lock_args);
if (retval /* && lock_op == FSAL_OP_LOCK */) {
retval = errno;
int rc = 0; //gluster use temporary rc to save get lock return value
LogDebug(COMPONENT_FSAL,
"fcntl returned %d %s",
retval, strerror(retval));
if (conflicting_lock != NULL) {
/* Get the conflicting lock */
rc = glfs_posix_lock(my_fd.glfd, F_GETLK,
&lock_args);
if (rc) { //if error, then reset the retval by errno.
retval = errno; /* we lose the initial error */
LogCrit(COMPONENT_FSAL,
"After failing a lock request, I couldn't even get the details of who owns the lock.");
goto err;
}
conflicting_lock->lock_length = lock_args.l_len;
conflicting_lock->lock_start = lock_args.l_start;
conflicting_lock->lock_type = lock_args.l_type;
}
goto err;
}
...
err:
SET_GLUSTER_CREDS(glfs_export, NULL, NULL, 0, NULL);
if (closefd)
glusterfs_close_my_fd(&my_fd);
if (has_lock)
PTHREAD_RWLOCK_unlock(&obj_hdl->obj_lock);
return fsalstat(posix2fsal_error(retval), retval);
}
Updated by yang jiang over 6 years ago
Updated by Jeff Layton over 6 years ago
As a quick test, I altered the ConcurrentRecordLocking and ThreesomeRecordLocking cephfs testcases to create different clients for each thread. That still passes, so it seems likely that we're able to set locks properly in cephfs.
To be clear, you have two ganesha servers here, and both are pointed to the same ceph cluster, right? What versions of ceph and ganesha are you running? Are the clients Linux? If so, what kernel version, please?
Updated by Jeff Layton over 6 years ago
- Status changed from New to Rejected
This turns out to be a bug in nfs-ganesha's FSAL_CEPH code. Closing this issue and will pursue it in the github issue.