From 8003d3c4aaa5560400818e14ce5db49cdfd79865 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 May 2014 06:28:45 -0400 Subject: nfs4: treat lock owners as opaque values Do the following set of ops with a file on a NFSv4 mount: exec 3>>/file/on/nfsv4 flock -x 3 exec 3>&- You'll see the LOCK request go across the wire, but no LOCKU when the file is closed. What happens is that the fd is passed across a fork, and the final close is done in a different process than the opener. That makes __nfs4_find_lock_state miss finding the correct lock state because it uses the fl_pid as a search key. A new one is created, and the locking code treats it as a delegation stateid (because NFS_LOCK_INITIALIZED isn't set). The root cause of this breakage seems to be commit 77041ed9b49a9e (NFSv4: Ensure the lockowners are labelled using the fl_owner and/or fl_pid). That changed it so that flock lockowners are allocated based on the fl_pid. I think this is incorrect. flock locks should be "owned" by the struct file, and that is already accounted for in the fl_owner field of the lock request when it comes through nfs_flock. This patch basically reverts the above commit and with it, a LOCKU is sent in the above reproducer. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 45 +++++++++------------------------------------ 1 file changed, 9 insertions(+), 36 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 848f6853c59e..544040835482 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -787,21 +787,12 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) * that is compatible with current->files */ static struct nfs4_lock_state * -__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *pos; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) + if (pos->ls_owner != fl_owner) continue; - switch (pos->ls_owner.lo_type) { - case NFS4_POSIX_LOCK_TYPE: - if (pos->ls_owner.lo_u.posix_owner != fl_owner) - continue; - break; - case NFS4_FLOCK_LOCK_TYPE: - if (pos->ls_owner.lo_u.flock_owner != fl_pid) - continue; - } atomic_inc(&pos->ls_count); return pos; } @@ -813,7 +804,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) +static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; struct nfs_server *server = state->owner->so_server; @@ -824,17 +815,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f nfs4_init_seqid_counter(&lsp->ls_seqid); atomic_set(&lsp->ls_count, 1); lsp->ls_state = state; - lsp->ls_owner.lo_type = type; - switch (lsp->ls_owner.lo_type) { - case NFS4_FLOCK_LOCK_TYPE: - lsp->ls_owner.lo_u.flock_owner = fl_pid; - break; - case NFS4_POSIX_LOCK_TYPE: - lsp->ls_owner.lo_u.posix_owner = fl_owner; - break; - default: - goto out_free; - } + lsp->ls_owner = fl_owner; lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); if (lsp->ls_seqid.owner_id < 0) goto out_free; @@ -857,13 +838,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { struct nfs4_lock_state *lsp, *new = NULL; for(;;) { spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, owner, pid, type); + lsp = __nfs4_find_lock_state(state, owner); if (lsp != NULL) break; if (new != NULL) { @@ -874,7 +855,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ break; } spin_unlock(&state->state_lock); - new = nfs4_alloc_lock_state(state, owner, pid, type); + new = nfs4_alloc_lock_state(state, owner); if (new == NULL) return NULL; } @@ -935,13 +916,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) if (fl->fl_ops != NULL) return 0; - if (fl->fl_flags & FL_POSIX) - lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); - else if (fl->fl_flags & FL_FLOCK) - lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid, - NFS4_FLOCK_LOCK_TYPE); - else - return -EINVAL; + lsp = nfs4_get_lock_state(state, fl->fl_owner); if (lsp == NULL) return -ENOMEM; fl->fl_u.nfs4_fl.owner = lsp; @@ -955,7 +930,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; - pid_t fl_pid; int ret = -ENOENT; @@ -966,9 +940,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, goto out; fl_owner = lockowner->l_owner; - fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); + lsp = __nfs4_find_lock_state(state, fl_owner); if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) ret = -EIO; else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { -- cgit v1.2.3 From 49a4bda22e186c4d0eb07f4a36b5b1a378f9398d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 May 2014 06:28:46 -0400 Subject: nfs4: queue free_lock_state job submission to nfsiod We got a report of the following warning in Fedora: BUG: sleeping function called from invalid context at mm/slub.c:969 in_atomic(): 1, irqs_disabled(): 0, pid: 533, name: bash 3 locks held by bash/533: #0: (&sp->so_delegreturn_mutex){+.+...}, at: [] nfs4_proc_lock+0x262/0x910 [nfsv4] #1: (&nfsi->rwsem){.+.+.+}, at: [] nfs4_proc_lock+0x26a/0x910 [nfsv4] #2: (&sb->s_type->i_lock_key#23){+.+...}, at: [] flock_lock_file_wait+0x8c/0x3a0 CPU: 0 PID: 533 Comm: bash Not tainted 3.15.0-0.rc1.git1.1.fc21.x86_64 #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000000 00000000d664ff3c ffff880078b69a70 ffffffff817e82e0 0000000000000000 ffff880078b69a98 ffffffff810cf1a4 0000000000000050 0000000000000050 ffff88007cc01a00 ffff880078b69ad8 ffffffff8121449e Call Trace: [] dump_stack+0x4d/0x66 [] __might_sleep+0x184/0x240 [] kmem_cache_alloc_trace+0x4e/0x330 [] ? nfs4_release_lockowner+0x74/0x110 [nfsv4] [] nfs4_release_lockowner+0x74/0x110 [nfsv4] [] nfs4_put_lock_state+0x90/0xb0 [nfsv4] [] nfs4_fl_release_lock+0x15/0x20 [nfsv4] [] locks_free_lock+0x45/0x90 [] flock_lock_file_wait+0x11c/0x3a0 [] ? nfs4_proc_lock+0x26a/0x910 [nfsv4] [] do_vfs_lock+0x1e/0x30 [nfsv4] [] nfs4_proc_lock+0x279/0x910 [nfsv4] [] ? local_clock+0x16/0x30 [] ? lock_release_holdtime.part.28+0xf/0x200 [] do_unlk+0x8c/0xc0 [nfs] [] nfs_flock+0xa5/0xf0 [nfs] [] locks_remove_file+0xb6/0x1e0 [] ? kfree+0xd8/0x2d0 [] __fput+0xd3/0x210 [] ____fput+0xe/0x10 [] task_work_run+0xcd/0xf0 [] do_notify_resume+0x61/0x90 [] int_signal+0x12/0x17 The problem is that NFSv4 is trying to do an allocation from fl_release_private (in order to send a RELEASE_LOCKOWNER call). That function can be called while holding the inode->i_lock, and it's currently set up to do __GFP_WAIT allocations. v4.1 code has a similar problem. This patch adds a work_struct to the nfs4_lock_state and has the code queue the free_lock_state operation to nfsiod. Reported-by: Josh Stone Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 544040835482..a770c8e469a7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -799,6 +799,18 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } +static void +free_lock_state_work(struct work_struct *work) +{ + struct nfs4_lock_state *lsp = container_of(work, + struct nfs4_lock_state, ls_release); + struct nfs4_state *state = lsp->ls_state; + struct nfs_server *server = state->owner->so_server; + struct nfs_client *clp = server->nfs_client; + + clp->cl_mvops->free_lock_state(server, lsp); +} + /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -820,6 +832,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f if (lsp->ls_seqid.owner_id < 0) goto out_free; INIT_LIST_HEAD(&lsp->ls_locks); + INIT_WORK(&lsp->ls_release, free_lock_state_work); return lsp; out_free: kfree(lsp); @@ -883,13 +896,12 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - server = state->owner->so_server; - if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { - struct nfs_client *clp = server->nfs_client; - - clp->cl_mvops->free_lock_state(server, lsp); - } else + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) + queue_work(nfsiod_workqueue, &lsp->ls_release); + else { + server = state->owner->so_server; nfs4_free_lock_state(server, lsp); + } } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) -- cgit v1.2.3 From 743162013d40ca612b4cb53d3a200dff2d9ab26e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Jul 2014 15:16:04 +1000 Subject: sched: Remove proliferation of wait_on_bit() action functions The current "wait_on_bit" interface requires an 'action' function to be provided which does the actual waiting. There are over 20 such functions, many of them identical. Most cases can be satisfied by one of just two functions, one which uses io_schedule() and one which just uses schedule(). So: Rename wait_on_bit and wait_on_bit_lock to wait_on_bit_action and wait_on_bit_lock_action to make it explicit that they need an action function. Introduce new wait_on_bit{,_lock} and wait_on_bit{,_lock}_io which are *not* given an action function but implicitly use a standard one. The decision to error-out if a signal is pending is now made based on the 'mode' argument rather than being encoded in the action function. All instances of the old wait_on_bit and wait_on_bit_lock which can use the new version have been changed accordingly and their action functions have been discarded. wait_on_bit{_lock} does not return any specific error code in the event of a signal so the caller must check for non-zero and interpolate their own error code as appropriate. The wait_on_bit() call in __fscache_wait_on_invalidate() was ambiguous as it specified TASK_UNINTERRUPTIBLE but used fscache_wait_bit_interruptible as an action function. David Howells confirms this should be uniformly "uninterruptible" The main remaining user of wait_on_bit{,_lock}_action is NFS which needs to use a freezer-aware schedule() call. A comment in fs/gfs2/glock.c notes that having multiple 'action' functions is useful as they display differently in the 'wchan' field of 'ps'. (and /proc/$PID/wchan). As the new bit_wait{,_io} functions are tagged "__sched", they will not show up at all, but something higher in the stack. So the distinction will still be visible, only with different function names (gds2_glock_wait versus gfs2_glock_dq_wait in the gfs2/glock.c case). Since first version of this patch (against 3.15) two new action functions appeared, on in NFS and one in CIFS. CIFS also now uses an action function that makes the same freezer aware schedule call as NFS. Signed-off-by: NeilBrown Acked-by: David Howells (fscache, keys) Acked-by: Steven Whitehouse (gfs2) Acked-by: Peter Zijlstra Cc: Oleg Nesterov Cc: Steve French Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140707051603.28027.72349.stgit@notabene.brown Signed-off-by: Ingo Molnar --- fs/nfs/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 848f6853c59e..42f121182167 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1251,8 +1251,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); atomic_inc(&clp->cl_count); - res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, - nfs_wait_bit_killable, TASK_KILLABLE); + res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, + nfs_wait_bit_killable, TASK_KILLABLE); if (res) goto out; if (clp->cl_cons_state < 0) -- cgit v1.2.3 From 0c0e0d3c091ed5b823fb89e1d46dbb9abd5830a7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Sep 2014 08:26:01 -0400 Subject: nfs: revert "nfs4: queue free_lock_state job submission to nfsiod" This reverts commit 49a4bda22e186c4d0eb07f4a36b5b1a378f9398d. Christoph reported an oops due to the above commit: generic/089 242s ...[ 2187.041239] general protection fault: 0000 [#1] SMP [ 2187.042899] Modules linked in: [ 2187.044000] CPU: 0 PID: 11913 Comm: kworker/0:1 Not tainted 3.16.0-rc6+ #1151 [ 2187.044287] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 2187.044287] Workqueue: nfsiod free_lock_state_work [ 2187.044287] task: ffff880072b50cd0 ti: ffff88007a4ec000 task.ti: ffff88007a4ec000 [ 2187.044287] RIP: 0010:[] [] free_lock_state_work+0x16/0x30 [ 2187.044287] RSP: 0018:ffff88007a4efd58 EFLAGS: 00010296 [ 2187.044287] RAX: 6b6b6b6b6b6b6b6b RBX: ffff88007a947ac0 RCX: 8000000000000000 [ 2187.044287] RDX: ffffffff826af9e0 RSI: ffff88007b093c00 RDI: ffff88007b093db8 [ 2187.044287] RBP: ffff88007a4efd58 R08: ffffffff832d3e10 R09: 000001c40efc0000 [ 2187.044287] R10: 0000000000000000 R11: 0000000000059e30 R12: ffff88007fc13240 [ 2187.044287] R13: ffff88007fc18b00 R14: ffff88007b093db8 R15: 0000000000000000 [ 2187.044287] FS: 0000000000000000(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 [ 2187.044287] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 2187.044287] CR2: 00007f93ec33fb80 CR3: 0000000079dc2000 CR4: 00000000000006f0 [ 2187.044287] Stack: [ 2187.044287] ffff88007a4efdd8 ffffffff810cc877 ffffffff810cc80d ffff88007fc13258 [ 2187.044287] 000000007a947af0 0000000000000000 ffffffff8353ccc8 ffffffff82b6f3d0 [ 2187.044287] 0000000000000000 ffffffff82267679 ffff88007a4efdd8 ffff88007fc13240 [ 2187.044287] Call Trace: [ 2187.044287] [] process_one_work+0x1c7/0x490 [ 2187.044287] [] ? process_one_work+0x15d/0x490 [ 2187.044287] [] worker_thread+0x119/0x4f0 [ 2187.044287] [] ? trace_hardirqs_on+0xd/0x10 [ 2187.044287] [] ? init_pwq+0x190/0x190 [ 2187.044287] [] kthread+0xdf/0x100 [ 2187.044287] [] ? __init_kthread_worker+0x70/0x70 [ 2187.044287] [] ret_from_fork+0x7c/0xb0 [ 2187.044287] [] ? __init_kthread_worker+0x70/0x70 [ 2187.044287] Code: 0f 1f 44 00 00 31 c0 5d c3 66 66 66 2e 0f 1f 84 00 00 00 00 00 55 48 8d b7 48 fe ff ff 48 8b 87 58 fe ff ff 48 89 e5 48 8b 40 30 <48> 8b 00 48 8b 10 48 89 c7 48 8b 92 90 03 00 00 ff 52 28 5d c3 [ 2187.044287] RIP [] free_lock_state_work+0x16/0x30 [ 2187.044287] RSP [ 2187.103626] ---[ end trace 0f11326d28e5d8fa ]--- The original reason for this patch was because the fl_release_private operation couldn't sleep. With commit ed9814d85810 (locks: defer freeing locks in locks_delete_lock until after i_lock has been dropped), this is no longer a problem so we can revert this patch. Reported-by: Christoph Hellwig Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Tested-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a043f618cd5a..22fe35104c0c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -799,18 +799,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } -static void -free_lock_state_work(struct work_struct *work) -{ - struct nfs4_lock_state *lsp = container_of(work, - struct nfs4_lock_state, ls_release); - struct nfs4_state *state = lsp->ls_state; - struct nfs_server *server = state->owner->so_server; - struct nfs_client *clp = server->nfs_client; - - clp->cl_mvops->free_lock_state(server, lsp); -} - /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -832,7 +820,6 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f if (lsp->ls_seqid.owner_id < 0) goto out_free; INIT_LIST_HEAD(&lsp->ls_locks); - INIT_WORK(&lsp->ls_release, free_lock_state_work); return lsp; out_free: kfree(lsp); @@ -896,12 +883,13 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) - queue_work(nfsiod_workqueue, &lsp->ls_release); - else { - server = state->owner->so_server; + server = state->owner->so_server; + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { + struct nfs_client *clp = server->nfs_client; + + clp->cl_mvops->free_lock_state(server, lsp); + } else nfs4_free_lock_state(server, lsp); - } } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) -- cgit v1.2.3