From 085d1e33a6a8495d9afa58ad2b8b7ea74d613515 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Thu, 11 Dec 2014 13:04:55 -0500 Subject: pnfs: Do not grab the commit_info lock twice when rescheduling writes Acked-by: Jeff Layton Signed-off-by: Tom Haynes --- fs/nfs/direct.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 10bf07280f4a..e84f764b9dcd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -573,6 +573,20 @@ out: return result; } +static void +nfs_direct_write_scan_commit_list(struct inode *inode, + struct list_head *list, + struct nfs_commit_info *cinfo) +{ + spin_lock(cinfo->lock); +#ifdef CONFIG_NFS_V4_1 + if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) + NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); +#endif + nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); + spin_unlock(cinfo->lock); +} + static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; @@ -582,10 +596,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) LIST_HEAD(failed); nfs_init_cinfo_from_dreq(&cinfo, dreq); - pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); - spin_lock(cinfo.lock); - nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); - spin_unlock(cinfo.lock); + nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); dreq->count = 0; get_dreq(dreq); -- cgit v1.2.3 From 6cccbb6f52dceec5f4faed8846ac05ae830640e6 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 16 Sep 2014 17:35:51 -0400 Subject: nfs: rename pgio header ds_idx to ds_commit_idx 'ds_commit_idx' is a better name - it is used to select the right commit bucket for pnfs. Signed-off-by: Weston Andros Adamson --- fs/nfs/direct.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e84f764b9dcd..d7c2d430b04d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -112,22 +112,22 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * nfs_direct_select_verf - select the right verifier * @dreq - direct request possibly spanning multiple servers * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs - * @ds_idx - index of data server in data server list, only valid if ds_clp set + * @commit_idx - commit bucket index for the DS * * returns the correct verifier to use given the role of the server */ static struct nfs_writeverf * nfs_direct_select_verf(struct nfs_direct_req *dreq, struct nfs_client *ds_clp, - int ds_idx) + int commit_idx) { struct nfs_writeverf *verfp = &dreq->verf; #ifdef CONFIG_NFS_V4_1 if (ds_clp) { /* pNFS is in use, use the DS verf */ - if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) - verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; + if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) + verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; else WARN_ON_ONCE(1); } @@ -148,8 +148,7 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, - hdr->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); WARN_ON_ONCE(verfp->committed >= 0); memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); WARN_ON_ONCE(verfp->committed < 0); @@ -169,8 +168,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, - hdr->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); if (verfp->committed < 0) { nfs_direct_set_hdr_verf(dreq, hdr); return 0; -- cgit v1.2.3 From b57ff1303a2d4d1484c7a82bd80a3e014d6cdf5e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 5 Sep 2014 18:20:21 -0400 Subject: pnfs: pass ds_commit_idx through the commit path Pass ds_commit_idx through the nfs commit path. It's used to select the commit bucket when using pnfs and is ignored when not using pnfs. Several functions had to be changed: nfs_retry_commit, nfs_mark_request_commit, pnfs_mark_request_commit and the pnfs layout driver .mark_request_commit functions. Signed-off-by: Tom Haynes --- fs/nfs/direct.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d7c2d430b04d..1ee41d74c31c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -649,7 +649,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_list_remove_request(req); if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { /* Note the rewrite will go through mds */ - nfs_mark_request_commit(req, NULL, &cinfo); + nfs_mark_request_commit(req, NULL, &cinfo, 0); } else nfs_release_request(req); nfs_unlock_and_release_request(req); @@ -748,7 +748,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (request_commit) { kref_get(&req->wb_kref); - nfs_mark_request_commit(req, hdr->lseg, &cinfo); + nfs_mark_request_commit(req, hdr->lseg, &cinfo, + hdr->ds_commit_idx); } nfs_unlock_and_release_request(req); } -- cgit v1.2.3 From a7d42ddb3099727f58366fa006f850a219cce6c8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 19 Sep 2014 10:55:07 -0400 Subject: nfs: add mirroring support to pgio layer This patch adds mirrored write support to the pgio layer. The default is to use one mirror, but pgio callers may define callbacks to change this to any value up to the (arbitrarily selected) limit of 16. The basic idea is to break out members of nfs_pageio_descriptor that cannot be shared between mirrored DSes and put them in a new structure. Signed-off-by: Weston Andros Adamson --- fs/nfs/direct.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1ee41d74c31c..0178d4fe8ab7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -360,8 +360,14 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) dreq->error = hdr->error; - else - dreq->count += hdr->good_bytes; + else { + /* + * FIXME: right now this only accounts for bytes written + * to the first mirror + */ + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; + } spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -724,7 +730,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->error = hdr->error; } if (dreq->error == 0) { - dreq->count += hdr->good_bytes; + /* + * FIXME: right now this only accounts for bytes written + * to the first mirror + */ + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) request_commit = true; -- cgit v1.2.3 From 0a00b77b331a0e4aac461d4e920677661256918a Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 19 Sep 2014 12:48:33 -0400 Subject: nfs: mirroring support for direct io The current mirroring code only notices short writes to the first mirror. This patch keeps per-mirror byte counts and only considers a byte to be written once all mirrors report so. Signed-off-by: Weston Andros Adamson --- fs/nfs/direct.c | 71 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 14 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0178d4fe8ab7..651387bbfd9f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -66,6 +66,10 @@ static struct kmem_cache *nfs_direct_cachep; /* * This represents a set of asynchronous requests that we're waiting on */ +struct nfs_direct_mirror { + ssize_t count; +}; + struct nfs_direct_req { struct kref kref; /* release manager */ @@ -78,6 +82,10 @@ struct nfs_direct_req { /* completion state */ atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ + + struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; + int mirror_count; + ssize_t count, /* bytes actually processed */ bytes_left, /* bytes left to be sent */ error; /* any reported error */ @@ -108,6 +116,29 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } +static void +nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) +{ + int i; + ssize_t count; + + WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); + + dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; + + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; + + /* update the dreq->count by finding the minimum agreed count from all + * mirrors */ + count = dreq->mirrors[0].count; + + for (i = 1; i < dreq->mirror_count; i++) + count = min(count, dreq->mirrors[i].count); + + dreq->count = count; +} + /* * nfs_direct_select_verf - select the right verifier * @dreq - direct request possibly spanning multiple servers @@ -241,6 +272,18 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, cinfo->completion_ops = &nfs_direct_commit_completion_ops; } +static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq, + struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + int mirror_count = 1; + + if (pgio->pg_ops->pg_get_mirror_count) + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); + + dreq->mirror_count = mirror_count; +} + static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; @@ -255,6 +298,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) INIT_LIST_HEAD(&dreq->mds_cinfo.list); dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); + dreq->mirror_count = 1; spin_lock_init(&dreq->lock); return dreq; @@ -360,14 +404,9 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) dreq->error = hdr->error; - else { - /* - * FIXME: right now this only accounts for bytes written - * to the first mirror - */ - if (hdr->pgio_mirror_idx == 0) - dreq->count += hdr->good_bytes; - } + else + nfs_direct_good_bytes(dreq, hdr); + spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -598,17 +637,23 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) LIST_HEAD(reqs); struct nfs_commit_info cinfo; LIST_HEAD(failed); + int i; nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); dreq->count = 0; + for (i = 0; i < dreq->mirror_count; i++) + dreq->mirrors[i].count = 0; get_dreq(dreq); nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; + req = nfs_list_entry(reqs.next); + nfs_direct_setup_mirroring(dreq, &desc, req); + list_for_each_entry_safe(req, tmp, &reqs, wb_list) { if (!nfs_pageio_add_request(&desc, req)) { nfs_list_remove_request(req); @@ -730,12 +775,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->error = hdr->error; } if (dreq->error == 0) { - /* - * FIXME: right now this only accounts for bytes written - * to the first mirror - */ - if (hdr->pgio_mirror_idx == 0) - dreq->count += hdr->good_bytes; + nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) request_commit = true; @@ -841,6 +881,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, result = PTR_ERR(req); break; } + + nfs_direct_setup_mirroring(dreq, &desc, req); + nfs_lock_request(req); req->wb_index = pos >> PAGE_SHIFT; req->wb_offset = pos & ~PAGE_MASK; -- cgit v1.2.3 From 80c76fe314c2859d5aac94b5d66d2b9895aa73d4 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 1 Oct 2014 12:58:25 -0400 Subject: pnfs: fail comparison when bucket verifier not set This skips the WARN_ON_ONCE, but doesnt change behavior (the memcmp would fail). Signed-off-by: Weston Andros Adamson Signed-off-by: Tom Haynes --- fs/nfs/direct.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 651387bbfd9f..eb814789f700 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -222,7 +222,11 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, verfp = nfs_direct_select_verf(dreq, data->ds_clp, data->ds_commit_index); - WARN_ON_ONCE(verfp->committed < 0); + + /* verifier not set so always fail */ + if (verfp->committed < 0) + return 1; + return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); } -- cgit v1.2.3 From 012fa16dca0da6c487dd066829ff0b0954925fe6 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 1 Dec 2014 08:22:21 +0800 Subject: nfs: add a helper to set NFS_ODIRECT_RESCHED_WRITES to direct writes To allow pnfs LD to ask direct writes to be resend. Signed-off-by: Peng Tao --- fs/nfs/direct.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index eb814789f700..4fad6b727eb4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -116,6 +116,12 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } +void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) +{ + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; +} +EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); + static void nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) { -- cgit v1.2.3 From 5fadeb47dcc5c30d4b6cf481b4a78689eab59443 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 19 Jan 2015 12:41:16 +0800 Subject: nfs: count DIO good bytes correctly with mirroring When resending to MDS, we might resend multiple mirroring requests to MDS. As a result, nfs_direct_good_bytes() ends up counting bytes multiple times, causing application to get wrong return results in read/write syscalls. Fix it by tracking start of a dreq and checking the range of pgio header. Cc: Weston Andros Adamson Signed-off-by: Peng Tao --- fs/nfs/direct.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4fad6b727eb4..3715b4957abc 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -88,6 +88,7 @@ struct nfs_direct_req { ssize_t count, /* bytes actually processed */ bytes_left, /* bytes left to be sent */ + io_start, /* start of IO */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ @@ -130,10 +131,11 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); - dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; - - if (hdr->pgio_mirror_idx == 0) - dreq->count += hdr->good_bytes; + count = dreq->mirrors[hdr->pgio_mirror_idx].count; + if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { + count = hdr->io_start + hdr->good_bytes - dreq->io_start; + dreq->mirrors[hdr->pgio_mirror_idx].count = count; + } /* update the dreq->count by finding the minimum agreed count from all * mirrors */ @@ -594,6 +596,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, dreq->inode = inode; dreq->bytes_left = count; + dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { @@ -1002,6 +1005,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, dreq->inode = inode; dreq->bytes_left = count; + dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { -- cgit v1.2.3 From f4086a3d789dbe18949862276d83b8f49fce6d2f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Feb 2015 21:03:16 -0500 Subject: NFS: struct nfs_commit_info.lock must always point to inode->i_lock Commit 411a99adffb4f (nfs: clear_request_commit while holding i_lock) assumes that the nfs_commit_info always points to the inode->i_lock. For historical reasons, that is not the case for O_DIRECT writes. Cc: Weston Andros Adamson Fixes: 411a99adffb4f ("nfs: clear_request_commit while holding i_lock") Cc: stable@vger.kernel.org # 3.17.x Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 7077521acdf4..e907c8cf732e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -283,7 +283,7 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages) void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq) { - cinfo->lock = &dreq->lock; + cinfo->lock = &dreq->inode->i_lock; cinfo->mds = &dreq->mds_cinfo; cinfo->ds = &dreq->ds_cinfo; cinfo->dreq = dreq; -- cgit v1.2.3