From 1e8968c5b0582392d5f132422f581e3ebc24e627 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Tue, 17 Dec 2013 18:20:16 +0100 Subject: NFS: dprintk() should not print negative fileids and inode numbers A fileid in NFS is a uint64. There are some occurrences where dprintk() outputs a signed fileid. This leads to confusion and more difficult to read debugging (negative fileids matching positive inode numbers). Signed-off-by: Niels de Vos CC: Santosh Pradhan Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00ad1c2b217d..5feec233895d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -458,9 +458,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); - dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", + dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), + (unsigned long long)NFS_FILEID(inode), nfs_display_fhandle_hash(fh), atomic_read(&inode->i_count)); @@ -870,8 +870,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) struct nfs_fattr *fattr = NULL; struct nfs_inode *nfsi = NFS_I(inode); - dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", - inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + dfprintk(PAGECACHE, "NFS: revalidating (%s/%Lu)\n", + inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); trace_nfs_revalidate_inode_enter(inode); @@ -895,9 +895,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); if (status != 0) { - dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), status); + (unsigned long long)NFS_FILEID(inode), status); if (status == -ESTALE) { nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) @@ -908,9 +908,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) status = nfs_refresh_inode(inode, fattr); if (status) { - dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), status); + (unsigned long long)NFS_FILEID(inode), status); goto err_out; } @@ -919,9 +919,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) nfs_setsecurity(inode, fattr, label); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", + dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); + (unsigned long long)NFS_FILEID(inode)); err_out: nfs4_label_free(label); @@ -985,8 +985,9 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); nfs_fscache_wait_on_invalidate(inode); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n", + inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(inode)); return 0; } @@ -1434,7 +1435,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long now = jiffies; unsigned long save_cache_validity; - dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); @@ -1455,7 +1456,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Big trouble! The inode has become a different object. */ - printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", + printk(KERN_DEBUG "NFS: %s: inode %lu mode changed, %07o to %07o\n", __func__, inode->i_ino, inode->i_mode, fattr->mode); goto out_err; } -- cgit v1.2.3 From d8c951c313ed1d7144b55c0d56f7c53220044dda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Jan 2014 12:08:11 -0500 Subject: NFSv4.1: Don't trust attributes if a pNFS LAYOUTCOMMIT is outstanding If a LAYOUTCOMMIT is outstanding, then chances are that the metadata server may still be returning incorrect values for the change attribute, ctime, mtime and/or size. Just ignore those attributes for now, and wait for the LAYOUTCOMMIT rpc call to finish. Reported-by: shaobingqing Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5feec233895d..c63e15224466 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1283,12 +1283,28 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } +/* + * Don't trust the change_attribute, mtime, ctime or size if + * a pnfs LAYOUTCOMMIT is outstanding + */ +static void nfs_inode_attrs_handle_layoutcommit(struct inode *inode, + struct nfs_fattr *fattr) +{ + if (pnfs_layoutcommit_outstanding(inode)) + fattr->valid &= ~(NFS_ATTR_FATTR_CHANGE | + NFS_ATTR_FATTR_MTIME | + NFS_ATTR_FATTR_CTIME | + NFS_ATTR_FATTR_SIZE); +} + static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { int ret; trace_nfs_refresh_inode_enter(inode); + nfs_inode_attrs_handle_layoutcommit(inode, fattr); + if (nfs_inode_attrs_need_update(inode, fattr)) ret = nfs_update_inode(inode, fattr); else @@ -1518,8 +1534,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) || - new_isize > cur_isize) { + if ((nfsi->npages == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } -- cgit v1.2.3 From 013cdf1088d7235da9477a2375654921d9b9ba9f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 20 Dec 2013 05:16:53 -0800 Subject: nfs: use generic posix ACL infrastructure for v3 Posix ACLs This causes a small behaviour change in that we don't bother to set ACLs on file creation if the mode bit can express the access permissions fully, and thus behaving identical to local filesystems. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/nfs/inode.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00ad1c2b217d..ecd11ba7f960 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1641,10 +1641,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; -#ifdef CONFIG_NFS_V3_ACL - nfsi->acl_access = ERR_PTR(-EAGAIN); - nfsi->acl_default = ERR_PTR(-EAGAIN); -#endif #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From d529ef83c355f97027ff85298a9709fe06216a66 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 27 Jan 2014 13:46:15 -0500 Subject: NFS: fix the handling of NFS_INO_INVALID_DATA flag in nfs_revalidate_mapping There is a possible race in how the nfs_invalidate_mapping function is handled. Currently, we go and invalidate the pages in the file and then clear NFS_INO_INVALID_DATA. The problem is that it's possible for a stale page to creep into the mapping after the page was invalidated (i.e., via readahead). If another writer comes along and sets the flag after that happens but before invalidate_inode_pages2 returns then we could clear the flag without the cache having been properly invalidated. So, we must clear the flag first and then invalidate the pages. Doing this however, opens another race: It's possible to have two concurrent read() calls that end up in nfs_revalidate_mapping at the same time. The first one clears the NFS_INO_INVALID_DATA flag and then goes to call nfs_invalidate_mapping. Just before calling that though, the other task races in, checks the flag and finds it cleared. At that point, it trusts that the mapping is good and gets the lock on the page, allowing the read() to be satisfied from the cache even though the data is no longer valid. These effects are easily manifested by running diotest3 from the LTP test suite on NFS. That program does a series of DIO writes and buffered reads. The operations are serialized and page-aligned but the existing code fails the test since it occasionally allows a read to come out of the cache incorrectly. While mixing direct and buffered I/O isn't recommended, I believe it's possible to hit this in other ways that just use buffered I/O, though that situation is much harder to reproduce. The problem is that the checking/clearing of that flag and the invalidation of the mapping really need to be atomic. Fix this by serializing concurrent invalidations with a bitlock. At the same time, we also need to allow other places that check NFS_INO_INVALID_DATA to check whether we might be in the middle of invalidating the file, so fix up a couple of places that do that to look for the new NFS_INO_INVALIDATING flag. Doing this requires us to be careful not to set the bitlock unnecessarily, so this code only does that if it believes it will be doing an invalidation. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c63e15224466..0a972ee9ccc1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -977,11 +977,11 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map if (ret < 0) return ret; } - spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { + spin_lock(&inode->i_lock); memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); + } nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); nfs_fscache_wait_on_invalidate(inode); @@ -1008,6 +1008,7 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode) int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long *bitlock = &nfsi->flags; int ret = 0; /* swapfiles are not supposed to be shared. */ @@ -1019,12 +1020,45 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) if (ret < 0) goto out; } + + /* + * We must clear NFS_INO_INVALID_DATA first to ensure that + * invalidations that come in while we're shooting down the mappings + * are respected. But, that leaves a race window where one revalidator + * can clear the flag, and then another checks it before the mapping + * gets invalidated. Fix that by serializing access to this part of + * the function. + * + * At the same time, we need to allow other tasks to see whether we + * might be in the middle of invalidating the pages, so we only set + * the bit lock here if it looks like we're going to be doing that. + */ + for (;;) { + ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (ret) + goto out; + if (!(nfsi->cache_validity & NFS_INO_INVALID_DATA)) + goto out; + if (!test_and_set_bit_lock(NFS_INO_INVALIDATING, bitlock)) + break; + } + + spin_lock(&inode->i_lock); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); trace_nfs_invalidate_mapping_exit(inode, ret); + } else { + /* something raced in and cleared the flag */ + spin_unlock(&inode->i_lock); } + clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_INVALIDATING); out: return ret; } -- cgit v1.2.3 From 17dfeb9113397a6119091a491ef7182649f0c5a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Jan 2014 09:37:16 -0500 Subject: NFS: Fix races in nfs_revalidate_mapping Commit d529ef83c355f97027ff85298a9709fe06216a66 (NFS: fix the handling of NFS_INO_INVALID_DATA flag in nfs_revalidate_mapping) introduces a potential race, since it doesn't test the value of nfsi->cache_validity and set the bitlock in nfsi->flags atomically. Signed-off-by: Trond Myklebust Cc: Jeff Layton --- fs/nfs/inode.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0a972ee9ccc1..e5070aa5f175 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1038,24 +1038,24 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) nfs_wait_bit_killable, TASK_KILLABLE); if (ret) goto out; - if (!(nfsi->cache_validity & NFS_INO_INVALID_DATA)) - goto out; - if (!test_and_set_bit_lock(NFS_INO_INVALIDATING, bitlock)) + spin_lock(&inode->i_lock); + if (test_bit(NFS_INO_INVALIDATING, bitlock)) { + spin_unlock(&inode->i_lock); + continue; + } + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) break; - } - - spin_lock(&inode->i_lock); - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); - trace_nfs_invalidate_mapping_enter(inode); - ret = nfs_invalidate_mapping(inode, mapping); - trace_nfs_invalidate_mapping_exit(inode, ret); - } else { - /* something raced in and cleared the flag */ spin_unlock(&inode->i_lock); + goto out; } + set_bit(NFS_INO_INVALIDATING, bitlock); + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); + trace_nfs_invalidate_mapping_enter(inode); + ret = nfs_invalidate_mapping(inode, mapping); + trace_nfs_invalidate_mapping_exit(inode, ret); + clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); smp_mb__after_clear_bit(); wake_up_bit(bitlock, NFS_INO_INVALIDATING); -- cgit v1.2.3 From 4db72b40fdbc706f8957e9773ae73b1574b8c694 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 28 Jan 2014 13:47:46 -0500 Subject: nfs: add memory barriers around NFS_INO_INVALID_DATA and NFS_INO_INVALIDATING If the setting of NFS_INO_INVALIDATING gets reordered to before the clearing of NFS_INO_INVALID_DATA, then another task may hit a race window where both appear to be clear, even though the inode's pages are still in need of invalidation. Fix this by adding the appropriate memory barriers. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e5070aa5f175..02e185168602 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1050,6 +1050,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) } set_bit(NFS_INO_INVALIDATING, bitlock); + smp_wmb(); nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); -- cgit v1.2.3