From 4177af3a8a6f119484c7903845c6693d7381c13e Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Mon, 23 Jan 2012 17:31:43 +0000 Subject: Define new macro XFS_ALL_QUOTA_ACTIVE and simply some usage Define new macro XFS_ALL_QUOTA_ACTIVE and simply some usage of quota macros. Signed-off-by: Chandra Seetharaman Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ee5b695c99a7..5e0d43f231a4 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -324,10 +324,9 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { mp->m_flags |= XFS_MOUNT_FILESTREAMS; } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); + mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; + mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; + mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; } else if (!strcmp(this_char, MNTOPT_QUOTA) || !strcmp(this_char, MNTOPT_UQUOTA) || !strcmp(this_char, MNTOPT_USRQUOTA)) { -- cgit v1.2.3 From aa6bf01d391935a8929333bc2e243084ea0c58db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Feb 2012 09:53:48 +0000 Subject: xfs: use per-filesystem I/O completion workqueues The new concurrency managed workqueues are cheap enough that we can create per-filesystem instead of global workqueues. This allows us to remove the trylock or defer scheme on the ilock, which is not helpful once we have outstanding log reservations until finishing a size update. Also allow the default concurrency on this workqueues so that I/O completions blocking on the ilock for one inode do not block process for another inode. Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5e0d43f231a4..c7f7bc2855a4 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -759,6 +759,36 @@ xfs_setup_devices( return 0; } +STATIC int +xfs_init_mount_workqueues( + struct xfs_mount *mp) +{ + mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", + WQ_MEM_RECLAIM, 0, mp->m_fsname); + if (!mp->m_data_workqueue) + goto out; + + mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", + WQ_MEM_RECLAIM, 0, mp->m_fsname); + if (!mp->m_unwritten_workqueue) + goto out_destroy_data_iodone_queue; + + return 0; + +out_destroy_data_iodone_queue: + destroy_workqueue(mp->m_data_workqueue); +out: + return -ENOMEM; +} + +STATIC void +xfs_destroy_mount_workqueues( + struct xfs_mount *mp) +{ + destroy_workqueue(mp->m_data_workqueue); + destroy_workqueue(mp->m_unwritten_workqueue); +} + /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( @@ -982,6 +1012,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); xfs_icsb_destroy_counters(mp); + xfs_destroy_mount_workqueues(mp); xfs_close_devices(mp); xfs_free_fsname(mp); kfree(mp); @@ -1308,10 +1339,14 @@ xfs_fs_fill_super( if (error) goto out_free_fsname; - error = xfs_icsb_init_counters(mp); + error = xfs_init_mount_workqueues(mp); if (error) goto out_close_devices; + error = xfs_icsb_init_counters(mp); + if (error) + goto out_destroy_workqueues; + error = xfs_readsb(mp, flags); if (error) goto out_destroy_counters; @@ -1374,6 +1409,8 @@ xfs_fs_fill_super( xfs_freesb(mp); out_destroy_counters: xfs_icsb_destroy_counters(mp); +out_destroy_workqueues: + xfs_destroy_mount_workqueues(mp); out_close_devices: xfs_close_devices(mp); out_free_fsname: -- cgit v1.2.3 From 8a9c9980f24f6d86e0ec0150ed35fba45d0c9f88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Feb 2012 09:53:52 +0000 Subject: xfs: log timestamp updates Timestamps on regular files are the last metadata that XFS does not update transactionally. Now that we use the delaylog mode exclusively and made the log scode scale extremly well there is no need to bypass that code for timestamp updates. Logging all updates allows to drop a lot of code, and will allow for further performance improvements later on. Note that this patch drops optimized handling of fdatasync - it will be added back in a separate commit. Reviewed-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 108 ++++++++++++++++++----------------------------------- 1 file changed, 37 insertions(+), 71 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c7f7bc2855a4..e602c8c67c5c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -863,91 +863,58 @@ xfs_fs_inode_init_once( } /* - * Dirty the XFS inode when mark_inode_dirty_sync() is called so that - * we catch unlogged VFS level updates to the inode. + * This is called by the VFS when dirtying inode metadata. This can happen + * for a few reasons, but we only care about timestamp updates, given that + * we handled the rest ourselves. In theory no other calls should happen, + * but for example generic_write_end() keeps dirtying the inode after + * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC, + * and skip this call otherwise. * - * We need the barrier() to maintain correct ordering between unlogged - * updates and the transaction commit code that clears the i_update_core - * field. This requires all updates to be completed before marking the - * inode dirty. + * We'll hopefull get a different method just for updating timestamps soon, + * at which point this hack can go away, and maybe we'll also get real + * error handling here. */ STATIC void xfs_fs_dirty_inode( - struct inode *inode, - int flags) -{ - barrier(); - XFS_I(inode)->i_update_core = 1; -} - -STATIC int -xfs_fs_write_inode( struct inode *inode, - struct writeback_control *wbc) + int flags) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - int error = EAGAIN; - - trace_xfs_write_inode(ip); - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) { - /* - * Make sure the inode has made it it into the log. Instead - * of forcing it all the way to stable storage using a - * synchronous transaction we let the log force inside the - * ->sync_fs call do that for thus, which reduces the number - * of synchronous log forces dramatically. - */ - error = xfs_log_dirty_inode(ip, NULL, 0); - if (error) - goto out; - return 0; - } else { - if (!ip->i_update_core) - return 0; + struct xfs_trans *tp; + int error; - /* - * We make this non-blocking if the inode is contended, return - * EAGAIN to indicate to the caller that they did not succeed. - * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by - * xfs_sync. - */ - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) - goto out; + if (flags != I_DIRTY_SYNC) + return; - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; + trace_xfs_dirty_inode(ip); - /* - * Now we have the flush lock and the inode is not pinned, we - * can check if the inode is really clean as we know that - * there are no pending transaction completions, it is not - * waiting on the delayed write queue and there is no IO in - * progress. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - error = 0; - goto out_unlock; - } - error = xfs_iflush(ip, SYNC_TRYLOCK); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + goto trouble; } - - out_unlock: - xfs_iunlock(ip, XFS_ILOCK_SHARED); - out: + xfs_ilock(ip, XFS_ILOCK_EXCL); /* - * if we failed to write out the inode then mark - * it dirty again so we'll try again later. + * Grab all the latest timestamps from the Linux inode. */ + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; + ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; + ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; + + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp, 0); if (error) - xfs_mark_inode_dirty_sync(ip); - return -error; + goto trouble; + return; + +trouble: + xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino); } STATIC void @@ -1466,7 +1433,6 @@ static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, - .write_inode = xfs_fs_write_inode, .evict_inode = xfs_fs_evict_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, -- cgit v1.2.3 From 8f639ddea0c4978ae9b4e46ea041c9e5afe0ee8d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Feb 2012 09:53:55 +0000 Subject: xfs: reimplement fdatasync support Add an in-memory only flag to say we logged timestamps only, and use it to check if fdatasync can optimize away the log force. Reviewed-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e602c8c67c5c..e9ad7894648e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -907,7 +907,7 @@ xfs_fs_dirty_inode( ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); error = xfs_trans_commit(tp, 0); if (error) goto trouble; -- cgit v1.2.3 From a05931ceb0160deadbd7798d60d01b17f2d81b09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Mar 2012 08:52:37 +0000 Subject: xfs: remove the global xfs_Gqm structure If we initialize the slab caches for the quota code when XFS is loaded there is no need for a global and reference counted quota manager structure. Drop all this overhead and also fix the error handling during quota initialization. Reviewed-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e9ad7894648e..06d23b976f4c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1654,13 +1654,17 @@ init_xfs_fs(void) if (error) goto out_cleanup_procfs; - vfs_initquota(); + error = xfs_qm_init(); + if (error) + goto out_sysctl_unregister; error = register_filesystem(&xfs_fs_type); if (error) - goto out_sysctl_unregister; + goto out_qm_exit; return 0; + out_qm_exit: + xfs_qm_exit(); out_sysctl_unregister: xfs_sysctl_unregister(); out_cleanup_procfs: @@ -1682,7 +1686,7 @@ init_xfs_fs(void) STATIC void __exit exit_xfs_fs(void) { - vfs_exitquota(); + xfs_qm_exit(); unregister_filesystem(&xfs_fs_type); xfs_sysctl_unregister(); xfs_cleanup_procfs(); -- cgit v1.2.3 From 8de52778798fe39660a8d6b26f290e0c93202761 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Feb 2012 12:45:27 -0500 Subject: vfs: check i_nlink limits in vfs_{mkdir,rename_dir,link} New field of struct super_block - ->s_max_links. Maximal allowed value of ->i_nlink or 0; in the latter case all checks still need to be done in ->link/->mkdir/->rename instances. Note that this limit applies both to directoris and to non-directories. Signed-off-by: Al Viro --- fs/xfs/xfs_super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ee5b695c99a7..0e4c5c017fba 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1341,6 +1341,7 @@ xfs_fs_fill_super( sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); + sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; set_posix_acl_flag(sb); -- cgit v1.2.3 From 48fde701aff662559b38d9a609574068f22d00fe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Jan 2012 22:15:13 -0500 Subject: switch open-coded instances of d_make_root() to new helper Signed-off-by: Al Viro --- fs/xfs/xfs_super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0e4c5c017fba..baf40e378d35 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1362,10 +1362,10 @@ xfs_fs_fill_super( error = EINVAL; goto out_syncd_stop; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { error = ENOMEM; - goto out_iput; + goto out_syncd_stop; } return 0; @@ -1384,8 +1384,6 @@ xfs_fs_fill_super( out: return -error; - out_iput: - iput(root); out_syncd_stop: xfs_syncd_stop(mp); out_unmount: -- cgit v1.2.3 From c999a223c2f0d31c64ef7379814cea1378b2b800 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Mar 2012 05:15:07 +0000 Subject: xfs: introduce an allocation workqueue We currently have significant issues with the amount of stack that allocation in XFS uses, especially in the writeback path. We can easily consume 4k of stack between mapping the page, manipulating the bmap btree and allocating blocks from the free list. Not to mention btree block readahead and other functionality that issues IO in the allocation path. As a result, we can no longer fit allocation in the writeback path in the stack space provided on x86_64. To alleviate this problem, introduce an allocation workqueue and move all allocations to a seperate context. This can be easily added as an interposing layer into xfs_alloc_vextent(), which takes a single argument structure and does not return until the allocation is complete or has failed. To do this, add a work structure and a completion to the allocation args structure. This allows xfs_alloc_vextent to queue the args onto the workqueue and wait for it to be completed by the worker. This can be done completely transparently to the caller. The worker function needs to ensure that it sets and clears the PF_TRANS flag appropriately as it is being run in an active transaction context. Work can also be queued in a memory reclaim context, so a rescuer is needed for the workqueue. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 06d23b976f4c..5484888d39c4 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1607,12 +1607,28 @@ xfs_init_workqueues(void) xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0); if (!xfs_syncd_wq) return -ENOMEM; + + /* + * The allocation workqueue can be used in memory reclaim situations + * (writepage path), and parallelism is only limited by the number of + * AGs in all the filesystems mounted. Hence use the default large + * max_active value for this workqueue. + */ + xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); + if (!xfs_alloc_wq) + goto out_destroy_syncd; + return 0; + +out_destroy_syncd: + destroy_workqueue(xfs_syncd_wq); + return -ENOMEM; } STATIC void xfs_destroy_workqueues(void) { + destroy_workqueue(xfs_alloc_wq); destroy_workqueue(xfs_syncd_wq); } -- cgit v1.2.3 From 5132ba8f2b7705fb6b06fa6ad3d009233c816b67 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Mar 2012 05:15:10 +0000 Subject: xfs: don't cache inodes read through bulkstat When we read inodes via bulkstat, we generally only read them once and then throw them away - they never get used again. If we retain them in cache, then it simply causes the working set of inodes and other cached items to be reclaimed just so the inode cache can grow. Avoid this problem by marking inodes read by bulkstat not to be cached and check this flag in .drop_inode to determine whether the inode should be added to the VFS LRU or not. If the inode lookup hits an already cached inode, then don't set the flag. If the inode lookup hits an inode marked with no cache flag, remove the flag and allow it to be cached once the current reference goes away. Inodes marked as not cached will get cleaned up by the background inode reclaim or via memory pressure, so they will still generate some short term cache pressure. They will, however, be reclaimed much sooner and in preference to cache hot inodes. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5484888d39c4..e1c623b43ab2 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -950,6 +950,22 @@ xfs_fs_evict_inode( xfs_inactive(ip); } +/* + * We do an unlocked check for XFS_IDONTCACHE here because we are already + * serialised against cache hits here via the inode->i_lock and igrab() in + * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be + * racing with us, and it avoids needing to grab a spinlock here for every inode + * we drop the final reference on. + */ +STATIC int +xfs_fs_drop_inode( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + + return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE); +} + STATIC void xfs_free_fsname( struct xfs_mount *mp) @@ -1434,6 +1450,7 @@ static const struct super_operations xfs_super_operations = { .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, .evict_inode = xfs_fs_evict_inode, + .drop_inode = xfs_fs_drop_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, -- cgit v1.2.3