From 6c6c1d63c243025956f061e67fff3a615aa0f6be Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:52 +0100
Subject: [PATCH] afs: Provide mount-time configurable byte-range file locking
 emulation

Provide byte-range file locking emulation that can be configured at mount
time to one of four modes:

 (1) flock=local.  Locking is done locally only and no reference is made to
     the server.

 (2) flock=openafs.  Byte-range locking is done locally only; whole-file
     locking is done with reference to the server.  Whole-file locks cannot
     be upgraded unless the client holds an exclusive lock.

 (3) flock=strict.  Byte-range and whole-file locking both require a
     sufficient whole-file lock on the server.

 (4) flock=write.  As strict, but the client always gets an exclusive
     whole-file lock on the server.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c             | 49 ++++++++++++++++++++++++++++++++------
 fs/afs/fsclient.c          |  2 +-
 fs/afs/internal.h          | 14 +++++++++++
 fs/afs/super.c             | 27 +++++++++++++++++++++
 fs/afs/yfsclient.c         |  2 +-
 include/trace/events/afs.h | 35 +++++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 9 deletions(-)

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 325bf731d8dd7..ef313f4c1d114 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -409,7 +409,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode)
  * whether we think that we have a locking permit.
  */
 static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
-			      afs_lock_type_t type, bool can_sleep)
+			      enum afs_flock_mode mode, afs_lock_type_t type)
 {
 	afs_access_t access;
 	int ret;
@@ -437,13 +437,9 @@ static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
 	if (type == AFS_LOCK_READ) {
 		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
 			return -EACCES;
-		if (vnode->status.lock_count == -1 && !can_sleep)
-			return -EAGAIN; /* Write locked */
 	} else {
 		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
 			return -EACCES;
-		if (vnode->status.lock_count != 0 && !can_sleep)
-			return -EAGAIN; /* Locked */
 	}
 
 	return 0;
@@ -456,24 +452,48 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 {
 	struct inode *inode = locks_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode;
 	afs_lock_type_t type;
 	struct key *key = afs_file_key(file);
+	bool partial, no_server_lock = false;
 	int ret;
 
-	_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+	if (mode == afs_flock_mode_unset)
+		mode = afs_flock_mode_openafs;
+
+	_enter("{%llx:%llu},%llu-%llu,%u,%u",
+	       vnode->fid.vid, vnode->fid.vnode,
+	       fl->fl_start, fl->fl_end, fl->fl_type, mode);
 
 	fl->fl_ops = &afs_lock_ops;
 	INIT_LIST_HEAD(&fl->fl_u.afs.link);
 	fl->fl_u.afs.state = AFS_LOCK_PENDING;
 
+	partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX);
 	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+	if (mode == afs_flock_mode_write && partial)
+		type = AFS_LOCK_WRITE;
 
-	ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
+	ret = afs_do_setlk_check(vnode, key, mode, type);
 	if (ret < 0)
 		return ret;
 
 	trace_afs_flock_op(vnode, fl, afs_flock_op_set_lock);
 
+	/* AFS3 protocol only supports full-file locks and doesn't provide any
+	 * method of upgrade/downgrade, so we need to emulate for partial-file
+	 * locks.
+	 *
+	 * The OpenAFS client only gets a server lock for a full-file lock and
+	 * keeps partial-file locks local.  Allow this behaviour to be emulated
+	 * (as the default).
+	 */
+	if (mode == afs_flock_mode_local ||
+	    (partial && mode == afs_flock_mode_openafs)) {
+		no_server_lock = true;
+		goto skip_server_lock;
+	}
+
 	spin_lock(&vnode->lock);
 	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
 
@@ -502,6 +522,18 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 		}
 	}
 
+	if (vnode->lock_state == AFS_VNODE_LOCK_NONE &&
+	    !(fl->fl_flags & FL_SLEEP)) {
+		ret = -EAGAIN;
+		if (type == AFS_LOCK_READ) {
+			if (vnode->status.lock_count == -1)
+				goto lock_is_contended; /* Write locked */
+		} else {
+			if (vnode->status.lock_count != 0)
+				goto lock_is_contended; /* Locked */
+		}
+	}
+
 	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
 		goto need_to_wait;
 
@@ -571,6 +603,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 	/* the lock has been granted by the server... */
 	ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED);
 
+skip_server_lock:
 	/* ... but the VFS still needs to distribute access on this client. */
 	trace_afs_flock_ev(vnode, fl, afs_flock_vfs_locking, 0);
 	ret = locks_lock_file_wait(file, fl);
@@ -649,6 +682,8 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 	 * deal with.
 	 */
 	_debug("vfs refused %d", ret);
+	if (no_server_lock)
+		goto error;
 	spin_lock(&vnode->lock);
 	list_del_init(&fl->fl_u.afs.link);
 	afs_defer_unlock(vnode);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index be4520eb49657..9b73a57aa5cbb 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1902,7 +1902,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 	*bp++ = htonl(type);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_calli(call, &vnode->fid, type);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 20fd44de26acd..91204e1428f26 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -36,11 +36,24 @@
 struct pagevec;
 struct afs_call;
 
+/*
+ * Partial file-locking emulation mode.  (The problem being that AFS3 only
+ * allows whole-file locks and no upgrading/downgrading).
+ */
+enum afs_flock_mode {
+	afs_flock_mode_unset,
+	afs_flock_mode_local,	/* Local locking only */
+	afs_flock_mode_openafs,	/* Don't get server lock for a partial lock */
+	afs_flock_mode_strict,	/* Always get a server lock for a partial lock */
+	afs_flock_mode_write,	/* Get an exclusive server lock for a partial lock */
+};
+
 struct afs_fs_context {
 	bool			force;		/* T to force cell type */
 	bool			autocell;	/* T if set auto mount operation */
 	bool			dyn_root;	/* T if dynamic root */
 	bool			no_cell;	/* T if the source is "none" (for dynroot) */
+	enum afs_flock_mode	flock_mode;	/* Partial file-locking emulation mode */
 	afs_voltype_t		type;		/* type of volume requested */
 	unsigned int		volnamesz;	/* size of volume name */
 	const char		*volname;	/* name of volume to mount */
@@ -221,6 +234,7 @@ struct afs_super_info {
 	struct net		*net_ns;	/* Network namespace */
 	struct afs_cell		*cell;		/* The cell in which the volume resides */
 	struct afs_volume	*volume;	/* volume record */
+	enum afs_flock_mode	flock_mode:8;	/* File locking emulation mode */
 	bool			dyn_root;	/* True if dynamic root */
 };
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ce85ae61f12d9..18334fa1a0d28 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -67,19 +67,30 @@ static atomic_t afs_count_active_inodes;
 enum afs_param {
 	Opt_autocell,
 	Opt_dyn,
+	Opt_flock,
 	Opt_source,
 };
 
 static const struct fs_parameter_spec afs_param_specs[] = {
 	fsparam_flag  ("autocell",	Opt_autocell),
 	fsparam_flag  ("dyn",		Opt_dyn),
+	fsparam_enum  ("flock",		Opt_flock),
 	fsparam_string("source",	Opt_source),
 	{}
 };
 
+static const struct fs_parameter_enum afs_param_enums[] = {
+	{ Opt_flock,	"local",	afs_flock_mode_local },
+	{ Opt_flock,	"openafs",	afs_flock_mode_openafs },
+	{ Opt_flock,	"strict",	afs_flock_mode_strict },
+	{ Opt_flock,	"write",	afs_flock_mode_write },
+	{}
+};
+
 static const struct fs_parameter_description afs_fs_parameters = {
 	.name		= "kAFS",
 	.specs		= afs_param_specs,
+	.enums		= afs_param_enums,
 };
 
 /*
@@ -182,11 +193,22 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
 static int afs_show_options(struct seq_file *m, struct dentry *root)
 {
 	struct afs_super_info *as = AFS_FS_S(root->d_sb);
+	const char *p = NULL;
 
 	if (as->dyn_root)
 		seq_puts(m, ",dyn");
 	if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
 		seq_puts(m, ",autocell");
+	switch (as->flock_mode) {
+	case afs_flock_mode_unset:	break;
+	case afs_flock_mode_local:	p = "local";	break;
+	case afs_flock_mode_openafs:	p = "openafs";	break;
+	case afs_flock_mode_strict:	p = "strict";	break;
+	case afs_flock_mode_write:	p = "write";	break;
+	}
+	if (p)
+		seq_printf(m, ",flock=%s", p);
+
 	return 0;
 }
 
@@ -315,6 +337,10 @@ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		ctx->dyn_root = true;
 		break;
 
+	case Opt_flock:
+		ctx->flock_mode = result.uint_32;
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -466,6 +492,7 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
 	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
 	if (as) {
 		as->net_ns = get_net(fc->net_ns);
+		as->flock_mode = ctx->flock_mode;
 		if (ctx->dyn_root) {
 			as->dyn_root = true;
 		} else {
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 5ea0350dc9dd6..055840aa07f63 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -1860,7 +1860,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_calli(call, &vnode->fid, type);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index e81d6a50781f9..f1373c29bf7d5 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -539,6 +539,41 @@ TRACE_EVENT(afs_make_fs_call,
 		      __print_symbolic(__entry->op, afs_fs_operations))
 	    );
 
+TRACE_EVENT(afs_make_fs_calli,
+	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
+		     unsigned int i),
+
+	    TP_ARGS(call, fid, i),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call		)
+		    __field(unsigned int,		i		)
+		    __field(enum afs_fs_operation,	op		)
+		    __field_struct(struct afs_fid,	fid		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call->debug_id;
+		    __entry->i = i;
+		    __entry->op = call->operation_ID;
+		    if (fid) {
+			    __entry->fid = *fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+			   ),
+
+	    TP_printk("c=%08x %06llx:%06llx:%06x %s i=%u",
+		      __entry->call,
+		      __entry->fid.vid,
+		      __entry->fid.vnode,
+		      __entry->fid.unique,
+		      __print_symbolic(__entry->op, afs_fs_operations),
+		      __entry->i)
+	    );
+
 TRACE_EVENT(afs_make_fs_call1,
 	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
 		     const char *name),
-- 
GitLab