From 5263e31e452fb84138b9bee061d5c06c0f359fea Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:55 -0500 Subject: locks: move flock locks to file_lock_context Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- fs/nfs/write.c | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..e072aeb34195 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1113,6 +1113,11 @@ int nfs_flush_incompatible(struct file *file, struct page *page) do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; } + if (l_ctx && ctx->dentry->d_inode->i_flctx && + !list_empty_careful(&ctx->dentry->d_inode->i_flctx->flc_flock)) { + do_flush |= l_ctx->lockowner.l_owner != current->files + || l_ctx->lockowner.l_pid != current->tgid; + } nfs_release_request(req); if (!do_flush) return 0; @@ -1170,6 +1175,13 @@ out: return PageUptodate(page) != 0; } +static bool +is_whole_file_wrlock(struct file_lock *fl) +{ + return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX && + fl->fl_type == F_WRLCK; +} + /* If we know the page is up to date, and we're not using byte range locks (or * if we have the whole file locked for writing), it may be more efficient to * extend the write to cover the entire page in order to avoid fragmentation @@ -1180,17 +1192,38 @@ out: */ static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) { + int ret; + struct file_lock_context *flctx = inode->i_flctx; + struct file_lock *fl; + if (file->f_flags & O_DSYNC) return 0; if (!nfs_write_pageuptodate(page, inode)) return 0; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return 1; - if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && - inode->i_flock->fl_end == OFFSET_MAX && - inode->i_flock->fl_type != F_RDLCK)) - return 1; - return 0; + if (!inode->i_flock && !flctx) + return 0; + + /* Check to see if there are whole file write locks */ + spin_lock(&inode->i_lock); + ret = 0; + + fl = inode->i_flock; + if (fl && is_whole_file_wrlock(fl)) { + ret = 1; + goto out; + } + + if (!list_empty(&flctx->flc_flock)) { + fl = list_first_entry(&flctx->flc_flock, struct file_lock, + fl_list); + if (fl->fl_type == F_WRLCK) + ret = 1; + } +out: + spin_unlock(&inode->i_lock); + return ret; } /* -- cgit v1.2.3 From bd61e0a9c852de2d705b6f1bb2cc54c5774db570 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:55 -0500 Subject: locks: convert posix locks to file_lock_context Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- fs/nfs/write.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e072aeb34195..784c13485b3f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1091,6 +1091,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_lock_context *l_ctx; + struct file_lock_context *flctx = file_inode(file)->i_flctx; struct nfs_page *req; int do_flush, status; /* @@ -1109,12 +1110,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page) do_flush = req->wb_page != page || req->wb_context != ctx; /* for now, flush if more than 1 request in page_group */ do_flush |= req->wb_this_page != req; - if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { - do_flush |= l_ctx->lockowner.l_owner != current->files - || l_ctx->lockowner.l_pid != current->tgid; - } - if (l_ctx && ctx->dentry->d_inode->i_flctx && - !list_empty_careful(&ctx->dentry->d_inode->i_flctx->flc_flock)) { + if (l_ctx && flctx && + !(list_empty_careful(&flctx->flc_posix) && + list_empty_careful(&flctx->flc_flock))) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; } @@ -1202,26 +1200,24 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino return 0; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return 1; - if (!inode->i_flock && !flctx) + if (!flctx || (list_empty_careful(&flctx->flc_flock) && + list_empty_careful(&flctx->flc_posix))) return 0; /* Check to see if there are whole file write locks */ - spin_lock(&inode->i_lock); ret = 0; - - fl = inode->i_flock; - if (fl && is_whole_file_wrlock(fl)) { - ret = 1; - goto out; - } - - if (!list_empty(&flctx->flc_flock)) { + spin_lock(&inode->i_lock); + if (!list_empty(&flctx->flc_posix)) { + fl = list_first_entry(&flctx->flc_posix, struct file_lock, + fl_list); + if (is_whole_file_wrlock(fl)) + ret = 1; + } else if (!list_empty(&flctx->flc_flock)) { fl = list_first_entry(&flctx->flc_flock, struct file_lock, fl_list); if (fl->fl_type == F_WRLCK) ret = 1; } -out: spin_unlock(&inode->i_lock); return ret; } -- cgit v1.2.3 From 6109c85037e53443f29fd39c0de69f578a1cf285 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:57 -0500 Subject: locks: add a dedicated spinlock to protect i_flctx lists We can now add a dedicated spinlock without expanding struct inode. Change to using that to protect the various i_flctx lists. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- fs/nfs/write.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 784c13485b3f..4ae66f416eb9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1206,7 +1206,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino /* Check to see if there are whole file write locks */ ret = 0; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); if (!list_empty(&flctx->flc_posix)) { fl = list_first_entry(&flctx->flc_posix, struct file_lock, fl_list); @@ -1218,7 +1218,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino if (fl->fl_type == F_WRLCK) ret = 1; } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); return ret; } -- cgit v1.2.3 From de1414a654e66b81b5348dbc5259ecf2fb61655e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2015 10:42:36 +0100 Subject: fs: export inode_to_bdi and use it in favor of mapping->backing_dev_info Now that we got rid of the bdi abuse on character devices we can always use sb->s_bdi to get at the backing_dev_info for a file, except for the block device special case. Export inode_to_bdi and replace uses of mapping->backing_dev_info with it to prepare for the removal of mapping->backing_dev_info. Signed-off-by: Christoph Hellwig Reviewed-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- fs/nfs/write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..298abcc5281b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -786,7 +786,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, spin_unlock(cinfo->lock); if (!cinfo->dreq) { inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, + inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), BDI_RECLAIMABLE); __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); @@ -853,7 +853,7 @@ static void nfs_clear_page_commit(struct page *page) { dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); + dec_bdi_stat(inode_to_bdi(page_file_mapping(page)->host), BDI_RECLAIMABLE); } /* Called holding inode (/cinfo) lock */ @@ -1564,7 +1564,7 @@ void nfs_retry_commit(struct list_head *page_list, nfs_mark_request_commit(req, lseg, cinfo); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, + dec_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), BDI_RECLAIMABLE); } nfs_unlock_and_release_request(req); -- cgit v1.2.3 From abde71f4d3c027a30f8d725e1e22001313b4481a Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Mon, 9 Jun 2014 13:12:20 -0700 Subject: pnfs: Add nfs_rpc_ops in calls to nfs_initiate_pgio Signed-off-by: Tom Haynes --- fs/nfs/write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..54d4857e0e2b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1240,15 +1240,15 @@ static int flush_task_priority(int how) static void nfs_initiate_write(struct nfs_pgio_header *hdr, struct rpc_message *msg, + const struct nfs_rpc_ops *rpc_ops, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = hdr->inode; int priority = flush_task_priority(how); task_setup_data->priority = priority; - NFS_PROTO(inode)->write_setup(hdr, msg); + rpc_ops->write_setup(hdr, msg); - nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, + nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, &task_setup_data->rpc_client, msg, hdr); } -- cgit v1.2.3 From c36aae9ad95afa2f9a9e9109d989c21af221fabd Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 9 Jun 2014 07:10:14 +0800 Subject: nfs: allow different protocol in nfs_initiate_commit pnfs flexfile layout client may want to use NFSv3 ops rather than the default MDS v4 ops. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/write.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 54d4857e0e2b..8800bd3b235d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1465,6 +1465,7 @@ void nfs_commitdata_release(struct nfs_commit_data *data) EXPORT_SYMBOL_GPL(nfs_commitdata_release); int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, + const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, int how, int flags) { @@ -1486,7 +1487,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, .priority = priority, }; /* Set up the initial task struct. */ - NFS_PROTO(data->inode)->commit_setup(data, &msg); + nfs_ops->commit_setup(data, &msg); dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); @@ -1589,8 +1590,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); atomic_inc(&cinfo->mds->rpcs_out); - return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, - how, 0); + return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), + data->mds_ops, how, 0); out_bad: nfs_retry_commit(head, NULL, cinfo); cinfo->completion_ops->error_cleanup(NFS_I(inode)); -- cgit v1.2.3 From 309a1d65b11de24d172f7dbbc21583ebd649c912 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 5 Sep 2014 16:34:29 -0400 Subject: nfs: handle overlapping reqs in lock_and_join This is needed for mirrored DS support, where multuple requests cover the same range. Signed-off-by: Weston Andros Adamson --- fs/nfs/write.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8800bd3b235d..e9974574b19a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -473,13 +473,18 @@ try_again: do { /* * Subrequests are always contiguous, non overlapping - * and in order. If not, it's a programming error. + * and in order - but may be repeated (mirrored writes). */ - WARN_ON_ONCE(subreq->wb_offset != - (head->wb_offset + total_bytes)); - - /* keep track of how many bytes this group covers */ - total_bytes += subreq->wb_bytes; + if (subreq->wb_offset == (head->wb_offset + total_bytes)) { + /* keep track of how many bytes this group covers */ + total_bytes += subreq->wb_bytes; + } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || + ((subreq->wb_offset + subreq->wb_bytes) > + (head->wb_offset + total_bytes)))) { + nfs_page_group_unlock(head); + spin_unlock(&inode->i_lock); + return ERR_PTR(-EIO); + } if (!nfs_lock_request(subreq)) { /* releases page group bit lock and -- cgit v1.2.3 From b57ff1303a2d4d1484c7a82bd80a3e014d6cdf5e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 5 Sep 2014 18:20:21 -0400 Subject: pnfs: pass ds_commit_idx through the commit path Pass ds_commit_idx through the nfs commit path. It's used to select the commit bucket when using pnfs and is ignored when not using pnfs. Several functions had to be changed: nfs_retry_commit, nfs_mark_request_commit, pnfs_mark_request_commit and the pnfs layout driver .mark_request_commit functions. Signed-off-by: Tom Haynes --- fs/nfs/write.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e9974574b19a..2bee165fddcf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -847,9 +847,9 @@ EXPORT_SYMBOL_GPL(nfs_init_cinfo); */ void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, u32 ds_commit_idx) { - if (pnfs_mark_request_commit(req, lseg, cinfo)) + if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx)) return; nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); } @@ -905,7 +905,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) } if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); - nfs_mark_request_commit(req, hdr->lseg, &cinfo); + nfs_mark_request_commit(req, hdr->lseg, &cinfo, + 0); goto next; } remove_req: @@ -1560,14 +1561,15 @@ EXPORT_SYMBOL_GPL(nfs_init_commit); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, + u32 ds_commit_idx) { struct nfs_page *req; while (!list_empty(page_list)) { req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); - nfs_mark_request_commit(req, lseg, cinfo); + nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, @@ -1598,7 +1600,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), data->mds_ops, how, 0); out_bad: - nfs_retry_commit(head, NULL, cinfo); + nfs_retry_commit(head, NULL, cinfo, 0); cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } -- cgit v1.2.3 From a7d42ddb3099727f58366fa006f850a219cce6c8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 19 Sep 2014 10:55:07 -0400 Subject: nfs: add mirroring support to pgio layer This patch adds mirrored write support to the pgio layer. The default is to use one mirror, but pgio callers may define callbacks to change this to any value up to the (arbitrarily selected) limit of 16. The basic idea is to break out members of nfs_pageio_descriptor that cannot be shared between mirrored DSes and put them in a new structure. Signed-off-by: Weston Andros Adamson --- fs/nfs/write.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2bee165fddcf..ceacfeeb28c2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -906,7 +906,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, - 0); + hdr->pgio_mirror_idx); goto next; } remove_req: @@ -1304,8 +1304,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { + struct nfs_pgio_mirror *mirror; + pgio->pg_ops = &nfs_pgio_rw_ops; - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; + + nfs_pageio_stop_mirroring(pgio); + + mirror = &pgio->pg_mirrors[0]; + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -- cgit v1.2.3 From d15bc38df607c893c36f4962dca0f57174c6a5c9 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Fri, 13 Feb 2015 13:19:53 -0800 Subject: nfs: Provide and use helper functions for marking a page as unstable Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 88a6d2196ece..76c278acaefc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -789,13 +789,8 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, nfs_list_add_request(req, dst); cinfo->mds->ncommit++; spin_unlock(cinfo->lock); - if (!cinfo->dreq) { - inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), - BDI_RECLAIMABLE); - __mark_inode_dirty(req->wb_context->dentry->d_inode, - I_DIRTY_DATASYNC); - } + if (!cinfo->dreq) + nfs_mark_page_unstable(req->wb_page); } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); -- cgit v1.2.3 From 487b9b8afde60986b606b3ee05169fb893adc153 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Fri, 13 Feb 2015 13:19:54 -0800 Subject: nfs: Can call nfs_clear_page_commit() instead Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 76c278acaefc..595d81e354d1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1600,11 +1600,8 @@ void nfs_retry_commit(struct list_head *page_list, req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); - if (!cinfo->dreq) { - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), - BDI_RECLAIMABLE); - } + if (!cinfo->dreq) + nfs_clear_page_commit(req->wb_page); nfs_unlock_and_release_request(req); } } -- cgit v1.2.3