From 1e8968c5b0582392d5f132422f581e3ebc24e627 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Tue, 17 Dec 2013 18:20:16 +0100 Subject: NFS: dprintk() should not print negative fileids and inode numbers A fileid in NFS is a uint64. There are some occurrences where dprintk() outputs a signed fileid. This leads to confusion and more difficult to read debugging (negative fileids matching positive inode numbers). Signed-off-by: Niels de Vos CC: Santosh Pradhan Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d71d66c9e0a1..049b3408fb9d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -237,9 +237,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) static void nfs_direct_readpage_release(struct nfs_page *req) { - dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", + dprintk("NFS: direct read done (%s/%llu %d@%lld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); nfs_release_request(req); -- cgit v1.2.3 From 9811cd57f4c6b5b60ec104de68a88303717e3106 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 08:50:28 -0800 Subject: nfs: fix size updates for aio writes nfs_file_direct_write only updates the inode size if it succeeded and returned the number of bytes written. But in the AIO case nfs_direct_wait turns the return value into -EIOCBQUEUED and we skip the size update. Instead the aio completion path should updated it, which this patch does. The implementation is a little hacky because there is no obvious way to find out we are called for a write in nfs_direct_complete. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 049b3408fb9d..ce52a9774ec1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -222,12 +222,23 @@ out: * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust * the iocb is still valid here if this is a synchronous request. */ -static void nfs_direct_complete(struct nfs_direct_req *dreq) +static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) { + struct inode *inode = dreq->inode; + if (dreq->iocb) { + loff_t pos = dreq->iocb->ki_pos + dreq->count; long res = (long) dreq->error; if (!res) res = (long) dreq->count; + + if (write) { + spin_lock(&inode->i_lock); + if (i_size_read(inode) < pos) + i_size_write(inode, pos); + spin_unlock(&inode->i_lock); + } + aio_complete(dreq->iocb, res, 0); } complete_all(&dreq->completion); @@ -272,7 +283,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) } out_put: if (put_dreq(dreq)) - nfs_direct_complete(dreq); + nfs_direct_complete(dreq, false); hdr->release(hdr); } @@ -434,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, } if (put_dreq(dreq)) - nfs_direct_complete(dreq); + nfs_direct_complete(dreq, false); return 0; } @@ -594,7 +605,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) break; default: nfs_inode_dio_write_done(dreq->inode); - nfs_direct_complete(dreq); + nfs_direct_complete(dreq, true); } } @@ -611,7 +622,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { nfs_inode_dio_write_done(inode); - nfs_direct_complete(dreq); + nfs_direct_complete(dreq, true); } #endif -- cgit v1.2.3 From 2a009ec98cce440c0992fc9a2353e96cdb0b048b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 08:50:29 -0800 Subject: nfs: defer inode_dio_done call until size update is done We need to have the I/O fully finished before telling the truncate code that we are done. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ce52a9774ec1..75ed2a90b0f2 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -226,21 +226,27 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) { struct inode *inode = dreq->inode; - if (dreq->iocb) { + if (dreq->iocb && write) { loff_t pos = dreq->iocb->ki_pos + dreq->count; + + spin_lock(&inode->i_lock); + if (i_size_read(inode) < pos) + i_size_write(inode, pos); + spin_unlock(&inode->i_lock); + } + + if (write) { + nfs_zap_mapping(inode, inode->i_mapping); + inode_dio_done(inode); + } + + if (dreq->iocb) { long res = (long) dreq->error; if (!res) res = (long) dreq->count; - - if (write) { - spin_lock(&inode->i_lock); - if (i_size_read(inode) < pos) - i_size_write(inode, pos); - spin_unlock(&inode->i_lock); - } - aio_complete(dreq->iocb, res, 0); } + complete_all(&dreq->completion); nfs_direct_req_release(dreq); @@ -483,12 +489,6 @@ out: return result; } -static void nfs_inode_dio_write_done(struct inode *inode) -{ - nfs_zap_mapping(inode, inode->i_mapping); - inode_dio_done(inode); -} - #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { @@ -604,7 +604,6 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) nfs_direct_write_reschedule(dreq); break; default: - nfs_inode_dio_write_done(dreq->inode); nfs_direct_complete(dreq, true); } } @@ -621,7 +620,6 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - nfs_inode_dio_write_done(inode); nfs_direct_complete(dreq, true); } #endif -- cgit v1.2.3 From 1f90ee27461e31a1c18e5d819f6ea6f5c7304b16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 08:50:30 -0800 Subject: nfs: increment i_dio_count for reads, too i_dio_count is used to protect dio access against truncate. We want to make sure there are no dio reads pending either when doing a truncate. I suspect on plain NFS things might work even without this, but once we use a pnfs layout driver that access backing devices directly things will go bad without the proper synchronization. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 75ed2a90b0f2..6c232107e835 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -235,10 +235,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) spin_unlock(&inode->i_lock); } - if (write) { + if (write) nfs_zap_mapping(inode, inode->i_mapping); - inode_dio_done(inode); - } + + inode_dio_done(inode); if (dreq->iocb) { long res = (long) dreq->error; @@ -419,6 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; + struct inode *inode = dreq->inode; ssize_t result = -EINVAL; size_t requested_bytes = 0; unsigned long seg; @@ -427,6 +428,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, &nfs_direct_read_completion_ops); get_dreq(dreq); desc.pg_dreq = dreq; + atomic_inc(&inode->i_dio_count); for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; @@ -446,6 +448,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { + inode_dio_done(inode); nfs_direct_req_release(dreq); return result < 0 ? result : -EIO; } -- cgit v1.2.3 From 14a3ec79437252d922bae574ecbf0c0584c330f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 08:50:31 -0800 Subject: nfs: merge nfs_direct_read into nfs_file_direct_read Simple code cleanup to prepare for later fixes. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 107 ++++++++++++++++++++++++++------------------------------ 1 file changed, 49 insertions(+), 58 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6c232107e835..f8b0a81c340e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -458,14 +458,55 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, return 0; } -static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) +/** + * nfs_file_direct_read - file direct read operation for NFS files + * @iocb: target I/O control block + * @iov: vector of user buffers into which to read data + * @nr_segs: size of iov vector + * @pos: byte offset in file where reading starts + * + * We use this function for direct reads instead of calling + * generic_file_aio_read() in order to avoid gfar's check to see if + * the request starts before the end of the file. For that check + * to work, we must generate a GETATTR before each direct read, and + * even then there is a window between the GETATTR and the subsequent + * READ where the file size could change. Our preference is simply + * to do all reads the application wants, and the server will take + * care of managing the end of file boundary. + * + * This function also eliminates unnecessarily updating the file's + * atime locally, as the NFS server sets the file's atime, and this + * client must read the updated atime from the server back into its + * cache. + */ +ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos, bool uio) { - ssize_t result = -ENOMEM; - struct inode *inode = iocb->ki_filp->f_mapping->host; + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; + ssize_t result = -EINVAL; + size_t count; + + count = iov_length(iov, nr_segs); + nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); + + dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", + file, count, (long long) pos); + + result = 0; + if (!count) + goto out; + + result = nfs_sync_mapping(mapping); + if (result) + goto out; + + task_io_account_read(count); + result = -ENOMEM; dreq = nfs_direct_req_alloc(); if (dreq == NULL) goto out; @@ -484,8 +525,11 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, NFS_I(inode)->read_io += iov_length(iov, nr_segs); result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); - if (!result) + if (!result) { result = nfs_direct_wait(dreq); + if (result > 0) + iocb->ki_pos = pos + result; + } out_release: nfs_direct_req_release(dreq); out: @@ -888,59 +932,6 @@ out: return result; } -/** - * nfs_file_direct_read - file direct read operation for NFS files - * @iocb: target I/O control block - * @iov: vector of user buffers into which to read data - * @nr_segs: size of iov vector - * @pos: byte offset in file where reading starts - * - * We use this function for direct reads instead of calling - * generic_file_aio_read() in order to avoid gfar's check to see if - * the request starts before the end of the file. For that check - * to work, we must generate a GETATTR before each direct read, and - * even then there is a window between the GETATTR and the subsequent - * READ where the file size could change. Our preference is simply - * to do all reads the application wants, and the server will take - * care of managing the end of file boundary. - * - * This function also eliminates unnecessarily updating the file's - * atime locally, as the NFS server sets the file's atime, and this - * client must read the updated atime from the server back into its - * cache. - */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) -{ - ssize_t retval = -EINVAL; - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - size_t count; - - count = iov_length(iov, nr_segs); - nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); - - dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", - file, count, (long long) pos); - - retval = 0; - if (!count) - goto out; - - retval = nfs_sync_mapping(mapping); - if (retval) - goto out; - - task_io_account_read(count); - - retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); - if (retval > 0) - iocb->ki_pos = pos + retval; - -out: - return retval; -} - /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block -- cgit v1.2.3 From 22cd1bf1480133518b3e5568466759ffde649b35 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 08:50:32 -0800 Subject: nfs: merge nfs_direct_write into nfs_file_direct_write Simple code cleanup to prepare for later fixes. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 91 ++++++++++++++++++++++++++------------------------------- 1 file changed, 41 insertions(+), 50 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f8b0a81c340e..cbfbd17eae85 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -898,40 +898,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, return 0; } -static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, - size_t count, bool uio) -{ - ssize_t result = -ENOMEM; - struct inode *inode = iocb->ki_filp->f_mapping->host; - struct nfs_direct_req *dreq; - struct nfs_lock_context *l_ctx; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - goto out; - - dreq->inode = inode; - dreq->bytes_left = count; - dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); - l_ctx = nfs_get_lock_context(dreq->ctx); - if (IS_ERR(l_ctx)) { - result = PTR_ERR(l_ctx); - goto out_release; - } - dreq->l_ctx = l_ctx; - if (!is_sync_kiocb(iocb)) - dreq->iocb = iocb; - - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); - if (!result) - result = nfs_direct_wait(dreq); -out_release: - nfs_direct_req_release(dreq); -out: - return result; -} - /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block @@ -957,9 +923,12 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, bool uio) { - ssize_t retval = -EINVAL; + ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct nfs_direct_req *dreq; + struct nfs_lock_context *l_ctx; size_t count; count = iov_length(iov, nr_segs); @@ -968,35 +937,57 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, count, (long long) pos); - retval = generic_write_checks(file, &pos, &count, 0); - if (retval) + result = generic_write_checks(file, &pos, &count, 0); + if (result) goto out; - retval = -EINVAL; + result = -EINVAL; if ((ssize_t) count < 0) goto out; - retval = 0; + result = 0; if (!count) goto out; - retval = nfs_sync_mapping(mapping); - if (retval) + result = nfs_sync_mapping(mapping); + if (result) goto out; task_io_account_write(count); - retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); - if (retval > 0) { - struct inode *inode = mapping->host; + result = -ENOMEM; + dreq = nfs_direct_req_alloc(); + if (!dreq) + goto out; - iocb->ki_pos = pos + retval; - spin_lock(&inode->i_lock); - if (i_size_read(inode) < iocb->ki_pos) - i_size_write(inode, iocb->ki_pos); - spin_unlock(&inode->i_lock); + dreq->inode = inode; + dreq->bytes_left = count; + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); + l_ctx = nfs_get_lock_context(dreq->ctx); + if (IS_ERR(l_ctx)) { + result = PTR_ERR(l_ctx); + goto out_release; + } + dreq->l_ctx = l_ctx; + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; + + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); + if (!result) { + result = nfs_direct_wait(dreq); + if (result > 0) { + struct inode *inode = mapping->host; + + iocb->ki_pos = pos + result; + spin_lock(&inode->i_lock); + if (i_size_read(inode) < iocb->ki_pos) + i_size_write(inode, iocb->ki_pos); + spin_unlock(&inode->i_lock); + } } +out_release: + nfs_direct_req_release(dreq); out: - return retval; + return result; } /** -- cgit v1.2.3 From d0b9875d65c1abcc9d405d648660dfb919353959 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 08:50:33 -0800 Subject: nfs: take i_mutex during direct I/O reads We'll need the i_mutex to prevent i_dio_count from incrementing while truncate is waiting for it to reach zero, and protects against having the pagecache repopulated after we flushed it. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index cbfbd17eae85..85e4e4be401a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -500,16 +500,17 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!count) goto out; + mutex_lock(&inode->i_mutex); result = nfs_sync_mapping(mapping); if (result) - goto out; + goto out_unlock; task_io_account_read(count); result = -ENOMEM; dreq = nfs_direct_req_alloc(); if (dreq == NULL) - goto out; + goto out_unlock; dreq->inode = inode; dreq->bytes_left = iov_length(iov, nr_segs); @@ -525,13 +526,22 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, NFS_I(inode)->read_io += iov_length(iov, nr_segs); result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); + + mutex_unlock(&inode->i_mutex); + if (!result) { result = nfs_direct_wait(dreq); if (result > 0) iocb->ki_pos = pos + result; } + + nfs_direct_req_release(dreq); + return result; + out_release: nfs_direct_req_release(dreq); +out_unlock: + mutex_unlock(&inode->i_mutex); out: return result; } -- cgit v1.2.3 From a9ab5e840669b19aca2974e2c771a77df2876434 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 08:50:34 -0800 Subject: nfs: page cache invalidation for dio Make sure to properly invalidate the pagecache before performing direct I/O, so that no stale pages are left around. This matches what the generic direct I/O code does. Also take the i_mutex over the direct write submission to avoid the lifelock vs truncate waiting for i_dio_count to decrease, and to avoid having the pagecache easily repopulated while direct I/O is in progrss. Again matching the generic direct I/O code. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 85e4e4be401a..b8797ae6831f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -939,9 +939,12 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; + loff_t end; size_t count; count = iov_length(iov, nr_segs); + end = (pos + count - 1) >> PAGE_CACHE_SHIFT; + nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", @@ -958,16 +961,25 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!count) goto out; + mutex_lock(&inode->i_mutex); + result = nfs_sync_mapping(mapping); if (result) - goto out; + goto out_unlock; + + if (mapping->nrpages) { + result = invalidate_inode_pages2_range(mapping, + pos >> PAGE_CACHE_SHIFT, end); + if (result) + goto out_unlock; + } task_io_account_write(count); result = -ENOMEM; dreq = nfs_direct_req_alloc(); if (!dreq) - goto out; + goto out_unlock; dreq->inode = inode; dreq->bytes_left = count; @@ -982,6 +994,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, dreq->iocb = iocb; result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); + + if (mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + pos >> PAGE_CACHE_SHIFT, end); + } + + mutex_unlock(&inode->i_mutex); + if (!result) { result = nfs_direct_wait(dreq); if (result > 0) { @@ -994,8 +1014,13 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, spin_unlock(&inode->i_lock); } } + nfs_direct_req_release(dreq); + return result; + out_release: nfs_direct_req_release(dreq); +out_unlock: + mutex_unlock(&inode->i_mutex); out: return result; } -- cgit v1.2.3