From b574480507460b8e31b8d38dd4642219fc3b9a10 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 20 Jun 2009 23:34:44 -0400
Subject: jbd2: Remove GFP_ATOMIC kmalloc from inside spinlock critical region

Fix jbd2_dev_to_name(), a function used when pretty-printting jbd2 and
ext4 tracepoints.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/journal.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 18bfd5dab642..7b545c3b3942 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2410,6 +2410,7 @@ const char *jbd2_dev_to_name(dev_t device)
 	int	i = hash_32(device, CACHE_SIZE_BITS);
 	char	*ret;
 	struct block_device *bd;
+	static struct devname_cache *new_dev;
 
 	rcu_read_lock();
 	if (devcache[i] && devcache[i]->device == device) {
@@ -2419,20 +2420,20 @@ const char *jbd2_dev_to_name(dev_t device)
 	}
 	rcu_read_unlock();
 
+	new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+	if (!new_dev)
+		return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
 	spin_lock(&devname_cache_lock);
 	if (devcache[i]) {
 		if (devcache[i]->device == device) {
+			kfree(new_dev);
 			ret = devcache[i]->devname;
 			spin_unlock(&devname_cache_lock);
 			return ret;
 		}
 		call_rcu(&devcache[i]->rcu, free_devcache);
 	}
-	devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
-	if (!devcache[i]) {
-		spin_unlock(&devname_cache_lock);
-		return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
-	}
+	devcache[i] = new_dev;
 	devcache[i]->device = device;
 	bd = bdget(device);
 	if (bd) {
-- 
cgit v1.2.3


From f4a01017d678fe4baecf480e79d7c4f4b7ebc772 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 5 Jul 2009 22:08:16 -0400
Subject: ext4: Fix potential reclaim deadlock when truncating partial block

The ext4_block_truncate_page() function previously called
grab_cache_page(), which called find_or_create_page() with the
__GFP_FS flag potentially set.  This could cause a deadlock if the
system is low on memory and it attempts a memory reclaim, which could
potentially call back into ext4.  So we need to call
find_or_create_page() directly, and remove the __GFP_FP flag to avoid
this potential deadlock.

Thanks to Roland Dreier for reporting a lockdep warning which showed
this problem.

[20786.363249] =================================
[20786.363257] [ INFO: inconsistent lock state ]
[20786.363265] 2.6.31-2-generic #14~rbd4gitd960eea9
[20786.363270] ---------------------------------
[20786.363276] inconsistent {IN-RECLAIM_FS-W} -> {RECLAIM_FS-ON-W} usage.
[20786.363285] http/8397 [HC0[0]:SC0[0]:HE1:SE1] takes:
[20786.363291]  (jbd2_handle){+.+.?.}, at: [<ffffffff812008bb>] jbd2_journal_start+0xdb/0x150
[20786.363314] {IN-RECLAIM_FS-W} state was registered at:
[20786.363320]   [<ffffffff8108bef6>] mark_irqflags+0xc6/0x1a0
[20786.363334]   [<ffffffff8108d347>] __lock_acquire+0x287/0x430
[20786.363345]   [<ffffffff8108d595>] lock_acquire+0xa5/0x150
[20786.363355]   [<ffffffff812008da>] jbd2_journal_start+0xfa/0x150
[20786.363365]   [<ffffffff811d98a8>] ext4_journal_start_sb+0x58/0x90
[20786.363377]   [<ffffffff811cce85>] ext4_delete_inode+0xc5/0x2c0
[20786.363389]   [<ffffffff81146fa3>] generic_delete_inode+0xd3/0x1a0
[20786.363401]   [<ffffffff81147095>] generic_drop_inode+0x25/0x30
[20786.363411]   [<ffffffff81145ce2>] iput+0x62/0x70
[20786.363420]   [<ffffffff81142878>] dentry_iput+0x98/0x110
[20786.363429]   [<ffffffff81142a00>] d_kill+0x50/0x80
[20786.363438]   [<ffffffff811444c5>] dput+0x95/0x180
[20786.363447]   [<ffffffff8120de4b>] ecryptfs_d_release+0x2b/0x70
[20786.363459]   [<ffffffff81142978>] d_free+0x28/0x60
[20786.363468]   [<ffffffff81142a18>] d_kill+0x68/0x80
[20786.363477]   [<ffffffff81142ad3>] prune_one_dentry+0xa3/0xc0
[20786.363487]   [<ffffffff81142d61>] __shrink_dcache_sb+0x271/0x290
[20786.363497]   [<ffffffff81142e89>] prune_dcache+0x109/0x1b0
[20786.363506]   [<ffffffff81142f6f>] shrink_dcache_memory+0x3f/0x50
[20786.363516]   [<ffffffff810f6d3d>] shrink_slab+0x12d/0x190
[20786.363527]   [<ffffffff810f97d7>] balance_pgdat+0x4d7/0x640
[20786.363537]   [<ffffffff810f9a57>] kswapd+0x117/0x170
[20786.363546]   [<ffffffff810773ce>] kthread+0x9e/0xb0
[20786.363558]   [<ffffffff8101430a>] child_rip+0xa/0x20
[20786.363569]   [<ffffffffffffffff>] 0xffffffffffffffff
[20786.363598] irq event stamp: 15997
[20786.363603] hardirqs last  enabled at (15997): [<ffffffff81125f9d>] kmem_cache_alloc+0xfd/0x1a0
[20786.363617] hardirqs last disabled at (15996): [<ffffffff81125f01>] kmem_cache_alloc+0x61/0x1a0
[20786.363628] softirqs last  enabled at (15966): [<ffffffff810631ea>] __do_softirq+0x14a/0x220
[20786.363641] softirqs last disabled at (15861): [<ffffffff8101440c>] call_softirq+0x1c/0x30
[20786.363651]
[20786.363653] other info that might help us debug this:
[20786.363660] 3 locks held by http/8397:
[20786.363665]  #0:  (&sb->s_type->i_mutex_key#8){+.+.+.}, at: [<ffffffff8112ed24>] do_truncate+0x64/0x90
[20786.363685]  #1:  (&sb->s_type->i_alloc_sem_key#5){+++++.}, at: [<ffffffff81147f90>] notify_change+0x250/0x350
[20786.363707]  #2:  (jbd2_handle){+.+.?.}, at: [<ffffffff812008bb>] jbd2_journal_start+0xdb/0x150
[20786.363724]
[20786.363726] stack backtrace:
[20786.363734] Pid: 8397, comm: http Tainted: G         C 2.6.31-2-generic #14~rbd4gitd960eea9
[20786.363741] Call Trace:
[20786.363752]  [<ffffffff8108ad7c>] print_usage_bug+0x18c/0x1a0
[20786.363763]  [<ffffffff8108b0c0>] ? check_usage_backwards+0x0/0xb0
[20786.363773]  [<ffffffff8108bad2>] mark_lock_irq+0xf2/0x280
[20786.363783]  [<ffffffff8108bd97>] mark_lock+0x137/0x1d0
[20786.363793]  [<ffffffff8108c03c>] mark_held_locks+0x6c/0xa0
[20786.363803]  [<ffffffff8108c11f>] lockdep_trace_alloc+0xaf/0xe0
[20786.363813]  [<ffffffff810efbac>] __alloc_pages_nodemask+0x7c/0x180
[20786.363824]  [<ffffffff810e9411>] ? find_get_page+0x91/0xf0
[20786.363835]  [<ffffffff8111d3b7>] alloc_pages_current+0x87/0xd0
[20786.363845]  [<ffffffff810e9827>] __page_cache_alloc+0x67/0x70
[20786.363856]  [<ffffffff810eb7df>] find_or_create_page+0x4f/0xb0
[20786.363867]  [<ffffffff811cb3be>] ext4_block_truncate_page+0x3e/0x460
[20786.363876]  [<ffffffff812008da>] ? jbd2_journal_start+0xfa/0x150
[20786.363885]  [<ffffffff812008bb>] ? jbd2_journal_start+0xdb/0x150
[20786.363895]  [<ffffffff811c6415>] ? ext4_meta_trans_blocks+0x75/0xf0
[20786.363905]  [<ffffffff811e8d8b>] ext4_ext_truncate+0x1bb/0x1e0
[20786.363916]  [<ffffffff811072c5>] ? unmap_mapping_range+0x75/0x290
[20786.363926]  [<ffffffff811ccc28>] ext4_truncate+0x498/0x630
[20786.363938]  [<ffffffff8129b4ce>] ? _raw_spin_unlock+0x5e/0xb0
[20786.363947]  [<ffffffff81107306>] ? unmap_mapping_range+0xb6/0x290
[20786.363957]  [<ffffffff8108c3ad>] ? trace_hardirqs_on+0xd/0x10
[20786.363966]  [<ffffffff811ffe58>] ? jbd2_journal_stop+0x1f8/0x2e0
[20786.363976]  [<ffffffff81107690>] vmtruncate+0xb0/0x110
[20786.363986]  [<ffffffff81147c05>] inode_setattr+0x35/0x170
[20786.363995]  [<ffffffff811c9906>] ext4_setattr+0x186/0x370
[20786.364005]  [<ffffffff81147eab>] notify_change+0x16b/0x350
[20786.364014]  [<ffffffff8112ed30>] do_truncate+0x70/0x90
[20786.364021]  [<ffffffff8112f48b>] T.657+0xeb/0x110
[20786.364021]  [<ffffffff8112f4be>] sys_ftruncate+0xe/0x10
[20786.364021]  [<ffffffff81013132>] system_call_fastpath+0x16/0x1b

Reported-by: Roland Dreier <roland@digitalvampire.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 60a26f3a6f8b..9760ba09275e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3583,7 +3583,8 @@ int ext4_block_truncate_page(handle_t *handle,
 	struct page *page;
 	int err = 0;
 
-	page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT);
+	page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+				   mapping_gfp_mask(mapping) & ~__GFP_FS);
 	if (!page)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 089ceecc1ea4a69ed8bcc5c7c7b96ce487e26b33 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sun, 5 Jul 2009 22:17:31 -0400
Subject: ext4: mark several more functions in mballoc.c as noinline

Ted noticed a stack-deep callchain through
writepages->ext4_mb_regular_allocator->ext4_mb_init_cache->submit_bh ...

With all the static functions in mballoc.c, gcc helpfully
inlines for us, and we get something like this:

ext4_mb_regular_allocator	(232 bytes stack)
	ext4_mb_init_cache	(232 bytes stack)
		submit_bh	(starts 464 deeper)

the 2 ext4 functions here get several others inlined; by telling
gcc not to inline them, we can save stack space for when we
head off into submit_bh land and associated block layer callchains.
The following noinlined functions are only called once, so this
won't impact any other callchains:

ext4_mb_regular_allocator 			(104) (was 232)
	ext4_mb_find_by_goal			 (56) (noinlined)
	ext4_mb_init_group			 (24) (noinlined)
		ext4_mb_init_cache		(136) (was 232)
			ext4_mb_generate_buddy	 (88) (noinlined)
			ext4_mb_generate_from_pa (40) (noinlined)
			submit_bh
	ext4_mb_simple_scan_group		 (24) (noinlined)
	ext4_mb_scan_aligned			 (56) (noinlined)
	ext4_mb_complex_scan_group		 (40) (noinlined)
	ext4_mb_try_best_found			 (24) (noinlined)

now when we head off into submit_bh() we're only 264 bytes deeper
in stack than when we entered ext4_mb_regular_allocator()
(vs. 464 bytes before).  Every 200 bytes helps.  :)

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 519a0a686d94..4a45efabb203 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -657,7 +657,8 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
 	}
 }
 
-static void ext4_mb_generate_buddy(struct super_block *sb,
+static noinline_for_stack
+void ext4_mb_generate_buddy(struct super_block *sb,
 				void *buddy, void *bitmap, ext4_group_t group)
 {
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
@@ -1480,7 +1481,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
 	ext4_mb_check_limits(ac, e4b, 0);
 }
 
-static int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
+static noinline_for_stack
+int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
 					struct ext4_buddy *e4b)
 {
 	struct ext4_free_extent ex = ac->ac_b_ex;
@@ -1507,7 +1509,8 @@ static int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
 	return 0;
 }
 
-static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
+static noinline_for_stack
+int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
 				struct ext4_buddy *e4b)
 {
 	ext4_group_t group = ac->ac_g_ex.fe_group;
@@ -1566,7 +1569,8 @@ static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
  * The routine scans buddy structures (not bitmap!) from given order
  * to max order and tries to find big enough chunk to satisfy the req
  */
-static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
+static noinline_for_stack
+void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
 					struct ext4_buddy *e4b)
 {
 	struct super_block *sb = ac->ac_sb;
@@ -1609,7 +1613,8 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
  * In order to optimize scanning, caller must pass number of
  * free blocks in the group, so the routine can know upper limit.
  */
-static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
+static noinline_for_stack
+void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 					struct ext4_buddy *e4b)
 {
 	struct super_block *sb = ac->ac_sb;
@@ -1668,7 +1673,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
  * we try to find stripe-aligned chunks for stripe-size requests
  * XXX should do so at least for multiples of stripe size as well
  */
-static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
+static noinline_for_stack
+void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
 				 struct ext4_buddy *e4b)
 {
 	struct super_block *sb = ac->ac_sb;
@@ -1831,7 +1837,8 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
 
 }
 
-static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+static noinline_for_stack
+int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 {
 
 	int ret;
@@ -3457,7 +3464,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
  * used in in-core bitmap. buddy must be generated from this bitmap
  * Need to be called with ext4 group lock held
  */
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+static noinline_for_stack
+void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group)
 {
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-- 
cgit v1.2.3


From 726447d803802cd0be8f62d17c4a34421781b938 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 13 Jul 2009 10:24:17 -0400
Subject: ext4: naturally align struct ext4_allocation_request

As Ted noted, the ext4_allocation_request isn't well aligned.  Looking
at it with pahole we're wasting space on 64-bit arches:

struct ext4_allocation_request {
        struct inode *             inode;              /*     0     8 */
        ext4_lblk_t                logical;            /*     8     4 */

        /* XXX 4 bytes hole, try to pack */

        ext4_fsblk_t               goal;               /*    16     8 */
        ext4_lblk_t                lleft;              /*    24     4 */

        /* XXX 4 bytes hole, try to pack */

        ext4_fsblk_t               pleft;              /*    32     8 */
        ext4_lblk_t                lright;             /*    40     4 */

        /* XXX 4 bytes hole, try to pack */

        ext4_fsblk_t               pright;             /*    48     8 */
        unsigned int               len;                /*    56     4 */
        unsigned int               flags;              /*    60     4 */
        /* --- cacheline 1 boundary (64 bytes) --- */

        /* size: 64, cachelines: 1, members: 9 */
        /* sum members: 52, holes: 3, sum holes: 12 */
};

Grouping 32-bit members together closes these holes and shrinks the
structure by 12 bytes. which is important since ext4 can get on the
hairy edge of stack overruns.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0ddf7e55abe1..9714db393efe 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -93,20 +93,20 @@ typedef unsigned int ext4_group_t;
 struct ext4_allocation_request {
 	/* target inode for block we're allocating */
 	struct inode *inode;
+	/* how many blocks we want to allocate */
+	unsigned int len;
 	/* logical block in target inode */
 	ext4_lblk_t logical;
-	/* phys. target (a hint) */
-	ext4_fsblk_t goal;
 	/* the closest logical allocated block to the left */
 	ext4_lblk_t lleft;
-	/* phys. block for ^^^ */
-	ext4_fsblk_t pleft;
 	/* the closest logical allocated block to the right */
 	ext4_lblk_t lright;
-	/* phys. block for ^^^ */
+	/* phys. target (a hint) */
+	ext4_fsblk_t goal;
+	/* phys. block for the closest logical allocated block to the left */
+	ext4_fsblk_t pleft;
+	/* phys. block for the closest logical allocated block to the right */
 	ext4_fsblk_t pright;
-	/* how many blocks we want to allocate */
-	unsigned int len;
 	/* flags. see above EXT4_MB_HINT_* */
 	unsigned int flags;
 };
-- 
cgit v1.2.3


From 3e03f9ca6a2599db1823bb0ea24e0845219a0e69 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Sun, 5 Jul 2009 22:29:27 -0400
Subject: ext4: Use rcu_barrier() on module unload.

The ext4 module uses rcu_call() thus it should use rcu_barrier()on
module unload.

The kmem cache ext4_pspace_cachep is sometimes free'ed using
call_rcu() callbacks.  Thus, we must wait for completion of call_rcu()
before doing kmem_cache_destroy().

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4a45efabb203..2fcaf286f1de 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2909,7 +2909,11 @@ int __init init_ext4_mballoc(void)
 
 void exit_ext4_mballoc(void)
 {
-	/* XXX: synchronize_rcu(); */
+	/* 
+	 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
+	 * before destroying the slab cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ext4_pspace_cachep);
 	kmem_cache_destroy(ext4_ac_cachep);
 	kmem_cache_destroy(ext4_free_ext_cachep);
-- 
cgit v1.2.3


From f91d1d04171026e56c7e343ee3cdcc801dd85cfb Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 13 Jul 2009 16:16:20 -0400
Subject: jbd2: Fix a race between checkpointing code and
 journal_get_write_access()

The following race can happen:

 CPU1                          CPU2
                               checkpointing code checks the buffer, adds
                                 it to an array for writeback
 do_get_write_access()
 ...
 lock_buffer()
 unlock_buffer()
                               flush_batch() submits the buffer for IO
 __jbd2_journal_file_buffer()

So a buffer under writeout is returned from
do_get_write_access(). Since the filesystem code relies on the fact
that journaled buffers cannot be written out, it does not take the
buffer lock and so it can modify buffer while it is under
writeout. That can lead to a filesystem corruption if we crash at the
right moment.

We fix the problem by clearing the buffer dirty bit under buffer_lock
even if the buffer is on BJ_None list. Actually, we clear the dirty
bit regardless the list the buffer is in and warn about the fact if
the buffer is already journalled.

Thanks for spotting the problem goes to dingdinghua <dingdinghua85@gmail.com>.

Reported-by: dingdinghua <dingdinghua85@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 68 ++++++++++++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 494501edba6b..6213ac728f30 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal)
 	wake_up(&journal->j_wait_transaction_locked);
 }
 
-/*
- * Report any unexpected dirty buffers which turn up.  Normally those
- * indicate an error, but they can occur if the user is running (say)
- * tune2fs to modify the live filesystem, so we need the option of
- * continuing as gracefully as possible.  #
- *
- * The caller should already hold the journal lock and
- * j_list_lock spinlock: most callers will need those anyway
- * in order to probe the buffer's journaling state safely.
- */
-static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
+static void warn_dirty_buffer(struct buffer_head *bh)
 {
-	int jlist;
-
-	/* If this buffer is one which might reasonably be dirty
-	 * --- ie. data, or not part of this journal --- then
-	 * we're OK to leave it alone, but otherwise we need to
-	 * move the dirty bit to the journal's own internal
-	 * JBDDirty bit. */
-	jlist = jh->b_jlist;
+	char b[BDEVNAME_SIZE];
 
-	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
-	    jlist == BJ_Shadow || jlist == BJ_Forget) {
-		struct buffer_head *bh = jh2bh(jh);
-
-		if (test_clear_buffer_dirty(bh))
-			set_buffer_jbddirty(bh);
-	}
+	printk(KERN_WARNING
+	       "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
+	       "There's a risk of filesystem corruption in case of system "
+	       "crash.\n",
+	       bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
 }
 
 /*
@@ -593,14 +574,16 @@ repeat:
 			if (jh->b_next_transaction)
 				J_ASSERT_JH(jh, jh->b_next_transaction ==
 							transaction);
+			warn_dirty_buffer(bh);
 		}
 		/*
 		 * In any case we need to clean the dirty flag and we must
 		 * do it under the buffer lock to be sure we don't race
 		 * with running write-out.
 		 */
-		JBUFFER_TRACE(jh, "Unexpected dirty buffer");
-		jbd_unexpected_dirty_buffer(jh);
+		JBUFFER_TRACE(jh, "Journalling dirty buffer");
+		clear_buffer_dirty(bh);
+		set_buffer_jbddirty(bh);
 	}
 
 	unlock_buffer(bh);
@@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
 
 	if (jh->b_transaction == NULL) {
+		/*
+		 * Previous jbd2_journal_forget() could have left the buffer
+		 * with jbddirty bit set because it was being committed. When
+		 * the commit finished, we've filed the buffer for
+		 * checkpointing and marked it dirty. Now we are reallocating
+		 * the buffer so the transaction freeing it must have
+		 * committed and so it's safe to clear the dirty bit.
+		 */
+		clear_buffer_dirty(jh2bh(jh));
 		jh->b_transaction = transaction;
 
 		/* first access by this transaction */
@@ -1644,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
 
 	if (jh->b_cp_transaction) {
 		JBUFFER_TRACE(jh, "on running+cp transaction");
+		/*
+		 * We don't want to write the buffer anymore, clear the
+		 * bit so that we don't confuse checks in
+		 * __journal_file_buffer
+		 */
+		clear_buffer_dirty(bh);
 		__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
-		clear_buffer_jbddirty(bh);
 		may_free = 0;
 	} else {
 		JBUFFER_TRACE(jh, "on running transaction");
@@ -1896,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
 	if (jh->b_transaction && jh->b_jlist == jlist)
 		return;
 
-	/* The following list of buffer states needs to be consistent
-	 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
-	 * state. */
-
 	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
 	    jlist == BJ_Shadow || jlist == BJ_Forget) {
+		/*
+		 * For metadata buffers, we track dirty bit in buffer_jbddirty
+		 * instead of buffer_dirty. We should not see a dirty bit set
+		 * here because we clear it in do_get_write_access but e.g.
+		 * tune2fs can modify the sb and set the dirty bit at any time
+		 * so we try to gracefully handle that.
+		 */
+		if (buffer_dirty(bh))
+			warn_dirty_buffer(bh);
 		if (test_clear_buffer_dirty(bh) ||
 		    test_clear_buffer_jbddirty(bh))
 			was_dirty = 1;
-- 
cgit v1.2.3


From ffacfa7a79d6c00624196b2d13b0a7f72f2b8227 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 13 Jul 2009 16:22:22 -0400
Subject: ext4: Fix truncation of symlinks after failed write

Contents of long symlinks is written via standard write methods. So
when the write fails, we add inode to orphan list. But symlinks don't
have .truncate method defined so nobody properly removes them from the
on disk orphan list.

Fix this by calling ext4_truncate() directly instead of calling
vmtruncate() (which is saner anyway since we don't need anything
vmtruncate() does except from calling .truncate in these paths).  We
also add inode to orphan list only if ext4_can_truncate() is true
(currently, it can be false for symlinks when there are no blocks
allocated) - otherwise orphan list processing will complain and
ext4_truncate() will not remove inode from on-disk orphan list.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9760ba09275e..ff2afc1909b3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1513,14 +1513,14 @@ retry:
 		 * Add inode to orphan list in case we crash before
 		 * truncate finishes
 		 */
-		if (pos + len > inode->i_size)
+		if (pos + len > inode->i_size && ext4_can_truncate(inode))
 			ext4_orphan_add(handle, inode);
 
 		ext4_journal_stop(handle);
 		if (pos + len > inode->i_size) {
-			vmtruncate(inode, inode->i_size);
+			ext4_truncate(inode);
 			/*
-			 * If vmtruncate failed early the inode might
+			 * If truncate failed early the inode might
 			 * still be on the orphan list; we need to
 			 * make sure the inode is removed from the
 			 * orphan list in that case.
@@ -1614,7 +1614,7 @@ static int ext4_ordered_write_end(struct file *file,
 		ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 		copied = ret2;
-		if (pos + len > inode->i_size)
+		if (pos + len > inode->i_size && ext4_can_truncate(inode))
 			/* if we have allocated more blocks and copied
 			 * less. We will have blocks allocated outside
 			 * inode->i_size. So truncate them
@@ -1628,9 +1628,9 @@ static int ext4_ordered_write_end(struct file *file,
 		ret = ret2;
 
 	if (pos + len > inode->i_size) {
-		vmtruncate(inode, inode->i_size);
+		ext4_truncate(inode);
 		/*
-		 * If vmtruncate failed early the inode might still be
+		 * If truncate failed early the inode might still be
 		 * on the orphan list; we need to make sure the inode
 		 * is removed from the orphan list in that case.
 		 */
@@ -1655,7 +1655,7 @@ static int ext4_writeback_write_end(struct file *file,
 	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 	copied = ret2;
-	if (pos + len > inode->i_size)
+	if (pos + len > inode->i_size && ext4_can_truncate(inode))
 		/* if we have allocated more blocks and copied
 		 * less. We will have blocks allocated outside
 		 * inode->i_size. So truncate them
@@ -1670,9 +1670,9 @@ static int ext4_writeback_write_end(struct file *file,
 		ret = ret2;
 
 	if (pos + len > inode->i_size) {
-		vmtruncate(inode, inode->i_size);
+		ext4_truncate(inode);
 		/*
-		 * If vmtruncate failed early the inode might still be
+		 * If truncate failed early the inode might still be
 		 * on the orphan list; we need to make sure the inode
 		 * is removed from the orphan list in that case.
 		 */
@@ -1722,7 +1722,7 @@ static int ext4_journalled_write_end(struct file *file,
 
 	unlock_page(page);
 	page_cache_release(page);
-	if (pos + len > inode->i_size)
+	if (pos + len > inode->i_size && ext4_can_truncate(inode))
 		/* if we have allocated more blocks and copied
 		 * less. We will have blocks allocated outside
 		 * inode->i_size. So truncate them
@@ -1733,9 +1733,9 @@ static int ext4_journalled_write_end(struct file *file,
 	if (!ret)
 		ret = ret2;
 	if (pos + len > inode->i_size) {
-		vmtruncate(inode, inode->i_size);
+		ext4_truncate(inode);
 		/*
-		 * If vmtruncate failed early the inode might still be
+		 * If truncate failed early the inode might still be
 		 * on the orphan list; we need to make sure the inode
 		 * is removed from the orphan list in that case.
 		 */
@@ -2907,7 +2907,7 @@ retry:
 		 * i_size_read because we hold i_mutex.
 		 */
 		if (pos + len > inode->i_size)
-			vmtruncate(inode, inode->i_size);
+			ext4_truncate(inode);
 	}
 
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
-- 
cgit v1.2.3


From 5887e98b609e96ce61ee0528cf94a2bfdc809dd7 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sun, 5 Jul 2009 23:12:04 -0400
Subject: ext4: Calculate required journal credits for inserting an extent
 properly

When we have space in the extent tree leaf node we should be able to
insert the extent with much less journal credits. The code was doing
proper calculation but missed a return statement.

Reported-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 50322a09bd01..73ebfb44ad75 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1977,6 +1977,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
 			 */
 			/* 1 bitmap, 1 block group descriptor */
 			ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
+			return ret;
 		}
 	}
 
-- 
cgit v1.2.3


From 5adfee9c17314c1411095c23191c3cb0c2d25f9f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 8 Jul 2009 17:11:24 -0400
Subject: ext4: fix no journal corruption with locale-gen

If there is no journal, ext4_should_writeback_data() should return
TRUE.  This will fix ext4_set_aops() to set ext4_da_ops in the case of
delayed allocation; otherwise ext4_journaled_aops gets used by
default, which doesn't handle delayed allocation properly.

The advantage of using ext4_should_writeback_data() approach is that
it should handle nobh better as well.

Thanks to Curt Wohlgemuth for investigating this problem, and Aneesh
Kumar for suggesting this approach.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4_jbd2.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index be2f426f6805..d574a85aca56 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -281,10 +281,10 @@ static inline int ext4_should_order_data(struct inode *inode)
 
 static inline int ext4_should_writeback_data(struct inode *inode)
 {
-	if (EXT4_JOURNAL(inode) == NULL)
-		return 0;
 	if (!S_ISREG(inode->i_mode))
 		return 0;
+	if (EXT4_JOURNAL(inode) == NULL)
+		return 1;
 	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
 		return 0;
 	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
-- 
cgit v1.2.3


From e6462869e4fd88be5141a356ee0c28d8067340cc Mon Sep 17 00:00:00 2001
From: Johann Lombardi <johann@Sun.COM>
Date: Sun, 5 Jul 2009 23:45:11 -0400
Subject: ext4: Fix goal inum check in the inode allocator

The goal inode is specificed by inode number which belongs
to [1; s_inodes_count].

Signed-off-by: Johann Lombardi <johann@sun.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ialloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2f645732e3b7..29e6dc7299b8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -833,7 +833,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
 	if (!goal)
 		goal = sbi->s_inode_goal;
 
-	if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
+	if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
 		group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
 		ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
 		ret2 = 0;
-- 
cgit v1.2.3


From b767e78a179e5ab30fdbff1686d074ac270471eb Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 4 Jun 2009 08:06:06 -0400
Subject: ext4: Don't look at buffer_heads outside i_size.

Buffer heads outside i_size will be unmapped. So when we
are doing "walk_page_buffers" limit ourself to i_size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: Josef Bacik <jbacik@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
----
---
 fs/ext4/inode.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ff2afc1909b3..b87b68cd3241 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2578,7 +2578,7 @@ static int ext4_da_writepage(struct page *page,
 		 * all are mapped and non delay. We don't want to
 		 * do block allocation here.
 		 */
-		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+		ret = block_prepare_write(page, 0, len,
 					  noalloc_get_block_write);
 		if (!ret) {
 			page_bufs = page_buffers(page);
@@ -2600,7 +2600,7 @@ static int ext4_da_writepage(struct page *page,
 			return 0;
 		}
 		/* now mark the buffer_heads as dirty and uptodate */
-		block_commit_write(page, 0, PAGE_CACHE_SIZE);
+		block_commit_write(page, 0, len);
 	}
 
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -3246,6 +3246,8 @@ static int ext4_normal_writepage(struct page *page,
 static int __ext4_journalled_writepage(struct page *page,
 				       struct writeback_control *wbc)
 {
+	loff_t size;
+	unsigned int len;
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
 	struct buffer_head *page_bufs;
@@ -3253,14 +3255,17 @@ static int __ext4_journalled_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
-	ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-				  noalloc_get_block_write);
+	size = i_size_read(inode);
+	if (page->index == size >> PAGE_CACHE_SHIFT)
+		len = size & ~PAGE_CACHE_MASK;
+	else
+		len = PAGE_CACHE_SIZE;
+	ret = block_prepare_write(page, 0, len, noalloc_get_block_write);
 	if (ret != 0)
 		goto out_unlock;
 
 	page_bufs = page_buffers(page);
-	walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
-								bget_one);
+	walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
 	/* As soon as we unlock the page, it can go away, but we have
 	 * references to buffers so we are safe */
 	unlock_page(page);
@@ -3271,19 +3276,18 @@ static int __ext4_journalled_writepage(struct page *page,
 		goto out;
 	}
 
-	ret = walk_page_buffers(handle, page_bufs, 0,
-			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
+	ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
+				do_journal_get_write_access);
 
-	err = walk_page_buffers(handle, page_bufs, 0,
-				PAGE_CACHE_SIZE, NULL, write_end_fn);
+	err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
+				write_end_fn);
 	if (ret == 0)
 		ret = err;
 	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 
-	walk_page_buffers(handle, page_bufs, 0,
-				PAGE_CACHE_SIZE, NULL, bput_one);
+	walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
 	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
 	goto out;
 
-- 
cgit v1.2.3


From 8616e0fc1e27295316f9821a883f0e9fa6f8200f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 11 Jun 2009 10:27:28 -0400
Subject: cifs: remove unneeded NULL checks from cifs_show_options

show_options is always called with the namespace_sem held. Therefore we
don't need to worry about the vfsmount being NULL, or it vanishing while
the function is running. By the same token, there's no need to worry
about the superblock, tcon, smb or tcp sessions being NULL on entry.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 130 +++++++++++++++++++++++++------------------------------
 1 file changed, 59 insertions(+), 71 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0d92114195ab..3f121fbd6c56 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -346,80 +346,68 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 	struct TCP_Server_Info *server;
 
 	cifs_sb = CIFS_SB(m->mnt_sb);
+	tcon = cifs_sb->tcon;
 
-	if (cifs_sb) {
-		tcon = cifs_sb->tcon;
-		if (tcon) {
-			seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
-			if (tcon->ses) {
-				if (tcon->ses->userName)
-					seq_printf(s, ",username=%s",
-					   tcon->ses->userName);
-				if (tcon->ses->domainName)
-					seq_printf(s, ",domain=%s",
-					   tcon->ses->domainName);
-				server = tcon->ses->server;
-				if (server) {
-					seq_printf(s, ",addr=");
-					switch (server->addr.sockAddr6.
-						sin6_family) {
-					case AF_INET6:
-						seq_printf(s, "%pI6",
-							   &server->addr.sockAddr6.sin6_addr);
-						break;
-					case AF_INET:
-						seq_printf(s, "%pI4",
-							   &server->addr.sockAddr.sin_addr.s_addr);
-						break;
-					}
-				}
-			}
-			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
-			   !(tcon->unix_ext))
-				seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
-			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
-			   !(tcon->unix_ext))
-				seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
-			if (!tcon->unix_ext) {
-				seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
+	seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
+	if (tcon->ses->userName)
+		seq_printf(s, ",username=%s", tcon->ses->userName);
+	if (tcon->ses->domainName)
+		seq_printf(s, ",domain=%s", tcon->ses->domainName);
+
+	cifs_show_address(s, tcon->ses->server);
+
+	seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
+	seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
+
+	server = tcon->ses->server;
+	seq_printf(s, ",addr=");
+	switch (server->addr.sockAddr6.sin6_family) {
+	case AF_INET6:
+		seq_printf(s, "%pI6", &server->addr.sockAddr6.sin6_addr);
+		break;
+	case AF_INET:
+		seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr);
+		break;
+	}
+
+	if (!tcon->unix_ext)
+		seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
 					   cifs_sb->mnt_file_mode,
 					   cifs_sb->mnt_dir_mode);
-			}
-			if (tcon->seal)
-				seq_printf(s, ",seal");
-			if (tcon->nocase)
-				seq_printf(s, ",nocase");
-			if (tcon->retry)
-				seq_printf(s, ",hard");
-		}
-		if (cifs_sb->prepath)
-			seq_printf(s, ",prepath=%s", cifs_sb->prepath);
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
-			seq_printf(s, ",posixpaths");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
-			seq_printf(s, ",setuids");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
-			seq_printf(s, ",serverino");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
-			seq_printf(s, ",directio");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
-			seq_printf(s, ",nouser_xattr");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
-			seq_printf(s, ",mapchars");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
-			seq_printf(s, ",sfu");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-			seq_printf(s, ",nobrl");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
-			seq_printf(s, ",cifsacl");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
-			seq_printf(s, ",dynperm");
-		if (m->mnt_sb->s_flags & MS_POSIXACL)
-			seq_printf(s, ",acl");
-
-		seq_printf(s, ",rsize=%d", cifs_sb->rsize);
-		seq_printf(s, ",wsize=%d", cifs_sb->wsize);
-	}
+	if (tcon->seal)
+		seq_printf(s, ",seal");
+	if (tcon->nocase)
+		seq_printf(s, ",nocase");
+	if (tcon->retry)
+		seq_printf(s, ",hard");
+	if (cifs_sb->prepath)
+		seq_printf(s, ",prepath=%s", cifs_sb->prepath);
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
+		seq_printf(s, ",posixpaths");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
+		seq_printf(s, ",setuids");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+		seq_printf(s, ",serverino");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
+		seq_printf(s, ",directio");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
+		seq_printf(s, ",nouser_xattr");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
+		seq_printf(s, ",mapchars");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
+		seq_printf(s, ",sfu");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+		seq_printf(s, ",nobrl");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
+		seq_printf(s, ",cifsacl");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
+		seq_printf(s, ",dynperm");
+	if (m->mnt_sb->s_flags & MS_POSIXACL)
+		seq_printf(s, ",acl");
+
+	seq_printf(s, ",rsize=%d", cifs_sb->rsize);
+	seq_printf(s, ",wsize=%d", cifs_sb->wsize);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 340481a36498bf3fe404bcecb2e2d6188e950bff Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 11 Jun 2009 10:27:29 -0400
Subject: cifs: have cifs_show_options show forceuid/forcegid options

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3f121fbd6c56..8b315708cb3f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -357,7 +357,12 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 	cifs_show_address(s, tcon->ses->server);
 
 	seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
+		seq_printf(s, ",forceuid");
+
 	seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
+		seq_printf(s, ",forcegid");
 
 	server = tcon->ses->server;
 	seq_printf(s, ",addr=");
-- 
cgit v1.2.3


From 1e68b2b2756fc3488ecbade5ad5f13302b3aaafc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 11 Jun 2009 10:27:30 -0400
Subject: cifs: add new routine for converting AF_INET and AF_INET6 addrs

...to consolidate some logic used in more than one place.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h   |  2 +-
 fs/cifs/connect.c     | 21 ++++-----------------
 fs/cifs/dns_resolve.c | 21 +++------------------
 fs/cifs/netmisc.c     | 34 ++++++++++++++++++++++++++++++----
 4 files changed, 38 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f9452329bcce..c419416a42ee 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -74,7 +74,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
 extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			enum securityEnum *secType);
-extern int cifs_inet_pton(const int, const char *source, void *dst);
+extern int cifs_convert_address(char *src, void *dst);
 extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
 			    const struct cifsTconInfo *, int /* length of
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 97f4311b9a8e..c368ad658236 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1433,28 +1433,15 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 
 	memset(&addr, 0, sizeof(struct sockaddr_storage));
 
-	if (volume_info->UNCip && volume_info->UNC) {
-		rc = cifs_inet_pton(AF_INET, volume_info->UNCip,
-				    &sin_server->sin_addr.s_addr);
-
-		if (rc <= 0) {
-			/* not ipv4 address, try ipv6 */
-			rc = cifs_inet_pton(AF_INET6, volume_info->UNCip,
-					    &sin_server6->sin6_addr.in6_u);
-			if (rc > 0)
-				addr.ss_family = AF_INET6;
-		} else {
-			addr.ss_family = AF_INET;
-		}
+	cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip));
 
-		if (rc <= 0) {
+	if (volume_info->UNCip && volume_info->UNC) {
+		rc = cifs_convert_address(volume_info->UNCip, &addr);
+		if (!rc) {
 			/* we failed translating address */
 			rc = -EINVAL;
 			goto out_err;
 		}
-
-		cFYI(1, ("UNC: %s ip: %s", volume_info->UNC,
-			 volume_info->UNCip));
 	} else if (volume_info->UNCip) {
 		/* BB using ip addr as tcp_ses name to connect to the
 		   DFS root below */
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index df4a306f697e..91b5500755bf 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -37,24 +37,9 @@
 static int
 is_ip(const char *name)
 {
-	int rc;
-	struct sockaddr_in sin_server;
-	struct sockaddr_in6 sin_server6;
-
-	rc = cifs_inet_pton(AF_INET, name,
-			&sin_server.sin_addr.s_addr);
-
-	if (rc <= 0) {
-		/* not ipv4 address, try ipv6 */
-		rc = cifs_inet_pton(AF_INET6, name,
-				&sin_server6.sin6_addr.in6_u);
-		if (rc > 0)
-			return 1;
-	} else {
-		return 1;
-	}
-	/* we failed translating address */
-	return 0;
+	struct sockaddr_storage ss;
+
+	return cifs_convert_address(name, &ss);
 }
 
 static int
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 32d6baa0a54f..00e6e357ae88 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -133,10 +133,12 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
 	{0, 0}
 };
 
-/* Convert string containing dotted ip address to binary form */
-/* returns 0 if invalid address */
-
-int
+/*
+ * Convert a string containing text IPv4 or IPv6 address to binary form.
+ *
+ * Returns 0 on failure.
+ */
+static int
 cifs_inet_pton(const int address_family, const char *cp, void *dst)
 {
 	int ret = 0;
@@ -153,6 +155,30 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
 	return ret;
 }
 
+/*
+ * Try to convert a string to an IPv4 address and then attempt to convert
+ * it to an IPv6 address if that fails. Set the family field if either
+ * succeeds.
+ *
+ * Returns 0 on failure.
+ */
+int
+cifs_convert_address(char *src, void *dst)
+{
+	struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
+	struct sockaddr_in6 *s6 = (Struct sockaddr_in6 *) dst;
+
+	if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) {
+		s4->sin_family = AF_INET;
+		return 1;
+	} else if (cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr)) {
+		s6->sin6_family = AF_INET6;
+		return 1;
+	}
+
+	return 0;
+}
+
 /*****************************************************************************
 convert a NT status code to a dos class/code
  *****************************************************************************/
-- 
cgit v1.2.3


From c364b22c9580a885e0f8c0d0f9710d67dc448958 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sun, 14 Jun 2009 17:57:10 -0400
Subject: ext4: Fix mmap/truncate race when blocksize < pagesize && delayed
 allocation

It is possible to see buffer_heads which are not mapped in the
writepage callback in the following scneario (where the fs blocksize
is 1k and the page size is 4k):

1) truncate(f, 1024)
2) mmap(f, 0, 4096)
3) a[0] = 'a'
4) truncate(f, 4096)
5) writepage(...)

Now if we get a writepage callback immediately after (4) and before an
attempt to write at any other offset via mmap address (which implies we
are yet to get a pagefault and do a get_block) what we would have is the
page which is dirty have first block allocated and the other three
buffer_heads unmapped.

In the above case the writepage should go ahead and try to write the
first blocks and clear the page_dirty flag. Further attempts to write
to the page will again create a fault and result in allocating blocks
and marking page dirty.  If we don't write any other offset via mmap
address we would still have written the first block to the disk and
rest of the space will be considered as a hole.

So to address this, we change all of the places where we look for
delayed, unmapped, or unwritten buffer heads, and only check for
delayed or unwritten buffer heads instead.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b87b68cd3241..1275f34589c7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2305,15 +2305,9 @@ flush_it:
 	return;
 }
 
-static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
 {
-	/*
-	 * unmapped buffer is possible for holes.
-	 * delay buffer is possible with delayed allocation.
-	 * We also need to consider unwritten buffer as unmapped.
-	 */
-	return (!buffer_mapped(bh) || buffer_delay(bh) ||
-				buffer_unwritten(bh)) && buffer_dirty(bh);
+	return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
 }
 
 /*
@@ -2400,7 +2394,7 @@ static int __mpage_da_writepage(struct page *page,
 			 * Otherwise we won't make progress
 			 * with the page in ext4_da_writepage
 			 */
-			if (ext4_bh_unmapped_or_delay(NULL, bh)) {
+			if (ext4_bh_delay_or_unwritten(NULL, bh)) {
 				mpage_add_bh_to_extent(mpd, logical,
 						       bh->b_size,
 						       bh->b_state);
@@ -2517,7 +2511,6 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 	 * so call get_block_wrap with create = 0
 	 */
 	ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
-	BUG_ON(create && ret == 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -2533,7 +2526,7 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
  *   - grab_page_cache when doing write_begin (have journal handle)
  */
 static int ext4_da_writepage(struct page *page,
-				struct writeback_control *wbc)
+			     struct writeback_control *wbc)
 {
 	int ret = 0;
 	loff_t size;
@@ -2551,7 +2544,7 @@ static int ext4_da_writepage(struct page *page,
 	if (page_has_buffers(page)) {
 		page_bufs = page_buffers(page);
 		if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-					ext4_bh_unmapped_or_delay)) {
+					ext4_bh_delay_or_unwritten)) {
 			/*
 			 * We don't want to do  block allocation
 			 * So redirty the page and return
@@ -2584,7 +2577,7 @@ static int ext4_da_writepage(struct page *page,
 			page_bufs = page_buffers(page);
 			/* check whether all are mapped and non delay */
 			if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-						ext4_bh_unmapped_or_delay)) {
+						ext4_bh_delay_or_unwritten)) {
 				redirty_page_for_writepage(wbc, page);
 				unlock_page(page);
 				return 0;
@@ -3232,7 +3225,7 @@ static int ext4_normal_writepage(struct page *page,
 		 * happily proceed with mapping them and writing the page.
 		 */
 		BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-					ext4_bh_unmapped_or_delay));
+					ext4_bh_delay_or_unwritten));
 	}
 
 	if (!ext4_journal_current_handle())
@@ -3322,7 +3315,7 @@ static int ext4_journalled_writepage(struct page *page,
 		 * happily proceed with mapping them and writing the page.
 		 */
 		BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-					ext4_bh_unmapped_or_delay));
+					ext4_bh_delay_or_unwritten));
 	}
 
 	if (ext4_journal_current_handle())
-- 
cgit v1.2.3


From 43ce1d23b43330634507a049b55c36e91d27282e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sun, 14 Jun 2009 17:58:45 -0400
Subject: ext4: Fix mmap/truncate race when blocksize < pagesize && !nodellaoc

This patch fixes the mmap/truncate race that was fixed for delayed
allocation by merging ext4_{journalled,normal,da}_writepage() into
ext4_writepage().

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 234 ++++++++++++++------------------------------------------
 1 file changed, 57 insertions(+), 177 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1275f34589c7..97c48b5b0578 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -47,6 +47,10 @@
 
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
+static int __ext4_journalled_writepage(struct page *page,
+				       struct writeback_control *wbc,
+				       unsigned int len);
+
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
 					      loff_t new_size)
 {
@@ -2392,7 +2396,7 @@ static int __mpage_da_writepage(struct page *page,
 			 * We need to try to allocate
 			 * unmapped blocks in the same page.
 			 * Otherwise we won't make progress
-			 * with the page in ext4_da_writepage
+			 * with the page in ext4_writepage
 			 */
 			if (ext4_bh_delay_or_unwritten(NULL, bh)) {
 				mpage_add_bh_to_extent(mpd, logical,
@@ -2519,13 +2523,47 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 }
 
 /*
+ * Note that we don't need to start a transaction unless we're journaling data
+ * because we should have holes filled from ext4_page_mkwrite(). We even don't
+ * need to file the inode to the transaction's list in ordered mode because if
+ * we are writing back data added by write(), the inode is already there and if
+ * we are writing back data modified via mmap(), noone guarantees in which
+ * transaction the data will hit the disk. In case we are journaling data, we
+ * cannot start transaction directly because transaction start ranks above page
+ * lock so we have to do some magic.
+ *
  * This function can get called via...
  *   - ext4_da_writepages after taking page lock (have journal handle)
  *   - journal_submit_inode_data_buffers (no journal handle)
  *   - shrink_page_list via pdflush (no journal handle)
  *   - grab_page_cache when doing write_begin (have journal handle)
+ *
+ * We don't do any block allocation in this function. If we have page with
+ * multiple blocks we need to write those buffer_heads that are mapped. This
+ * is important for mmaped based write. So if we do with blocksize 1K
+ * truncate(f, 1024);
+ * a = mmap(f, 0, 4096);
+ * a[0] = 'a';
+ * truncate(f, 4096);
+ * we have in the page first buffer_head mapped via page_mkwrite call back
+ * but other bufer_heads would be unmapped but dirty(dirty done via the
+ * do_wp_page). So writepage should write the first block. If we modify
+ * the mmap area beyond 1024 we will again get a page_fault and the
+ * page_mkwrite callback will do the block allocation and mark the
+ * buffer_heads mapped.
+ *
+ * We redirty the page if we have any buffer_heads that is either delay or
+ * unwritten in the page.
+ *
+ * We can get recursively called as show below.
+ *
+ *	ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
+ *		ext4_writepage()
+ *
+ * But since we don't do any block allocation we should not deadlock.
+ * Page also have the dirty flag cleared so we don't get recurive page_lock.
  */
-static int ext4_da_writepage(struct page *page,
+static int ext4_writepage(struct page *page,
 			     struct writeback_control *wbc)
 {
 	int ret = 0;
@@ -2534,7 +2572,7 @@ static int ext4_da_writepage(struct page *page,
 	struct buffer_head *page_bufs;
 	struct inode *inode = page->mapping->host;
 
-	trace_ext4_da_writepage(inode, page);
+	trace_ext4_writepage(inode, page);
 	size = i_size_read(inode);
 	if (page->index == size >> PAGE_CACHE_SHIFT)
 		len = size & ~PAGE_CACHE_MASK;
@@ -2596,6 +2634,15 @@ static int ext4_da_writepage(struct page *page,
 		block_commit_write(page, 0, len);
 	}
 
+	if (PageChecked(page) && ext4_should_journal_data(inode)) {
+		/*
+		 * It's mmapped pagecache.  Add buffers and journal it.  There
+		 * doesn't seem much point in redirtying the page here.
+		 */
+		ClearPageChecked(page);
+		return __ext4_journalled_writepage(page, wbc, len);
+	}
+
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_writepage(page, noalloc_get_block_write, wbc);
 	else
@@ -3135,112 +3182,10 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
 	return 0;
 }
 
-/*
- * Note that we don't need to start a transaction unless we're journaling data
- * because we should have holes filled from ext4_page_mkwrite(). We even don't
- * need to file the inode to the transaction's list in ordered mode because if
- * we are writing back data added by write(), the inode is already there and if
- * we are writing back data modified via mmap(), noone guarantees in which
- * transaction the data will hit the disk. In case we are journaling data, we
- * cannot start transaction directly because transaction start ranks above page
- * lock so we have to do some magic.
- *
- * In all journaling modes block_write_full_page() will start the I/O.
- *
- * Problem:
- *
- *	ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- *		ext4_writepage()
- *
- * Similar for:
- *
- *	ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
- *
- * Same applies to ext4_get_block().  We will deadlock on various things like
- * lock_journal and i_data_sem
- *
- * Setting PF_MEMALLOC here doesn't work - too many internal memory
- * allocations fail.
- *
- * 16May01: If we're reentered then journal_current_handle() will be
- *	    non-zero. We simply *return*.
- *
- * 1 July 2001: @@@ FIXME:
- *   In journalled data mode, a data buffer may be metadata against the
- *   current transaction.  But the same file is part of a shared mapping
- *   and someone does a writepage() on it.
- *
- *   We will move the buffer onto the async_data list, but *after* it has
- *   been dirtied. So there's a small window where we have dirty data on
- *   BJ_Metadata.
- *
- *   Note that this only applies to the last partial page in the file.  The
- *   bit which block_write_full_page() uses prepare/commit for.  (That's
- *   broken code anyway: it's wrong for msync()).
- *
- *   It's a rare case: affects the final partial page, for journalled data
- *   where the file is subject to bith write() and writepage() in the same
- *   transction.  To fix it we'll need a custom block_write_full_page().
- *   We'll probably need that anyway for journalling writepage() output.
- *
- * We don't honour synchronous mounts for writepage().  That would be
- * disastrous.  Any write() or metadata operation will sync the fs for
- * us.
- *
- */
-static int __ext4_normal_writepage(struct page *page,
-				   struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-
-	if (test_opt(inode->i_sb, NOBH))
-		return nobh_writepage(page, noalloc_get_block_write, wbc);
-	else
-		return block_write_full_page(page, noalloc_get_block_write,
-					     wbc);
-}
-
-static int ext4_normal_writepage(struct page *page,
-				 struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	loff_t size = i_size_read(inode);
-	loff_t len;
-
-	trace_ext4_normal_writepage(inode, page);
-	J_ASSERT(PageLocked(page));
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
-	else
-		len = PAGE_CACHE_SIZE;
-
-	if (page_has_buffers(page)) {
-		/* if page has buffers it should all be mapped
-		 * and allocated. If there are not buffers attached
-		 * to the page we know the page is dirty but it lost
-		 * buffers. That means that at some moment in time
-		 * after write_begin() / write_end() has been called
-		 * all buffers have been clean and thus they must have been
-		 * written at least once. So they are all mapped and we can
-		 * happily proceed with mapping them and writing the page.
-		 */
-		BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-					ext4_bh_delay_or_unwritten));
-	}
-
-	if (!ext4_journal_current_handle())
-		return __ext4_normal_writepage(page, wbc);
-
-	redirty_page_for_writepage(wbc, page);
-	unlock_page(page);
-	return 0;
-}
-
 static int __ext4_journalled_writepage(struct page *page,
-				       struct writeback_control *wbc)
+				       struct writeback_control *wbc,
+				       unsigned int len)
 {
-	loff_t size;
-	unsigned int len;
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
 	struct buffer_head *page_bufs;
@@ -3248,16 +3193,8 @@ static int __ext4_journalled_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
-	size = i_size_read(inode);
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
-	else
-		len = PAGE_CACHE_SIZE;
-	ret = block_prepare_write(page, 0, len, noalloc_get_block_write);
-	if (ret != 0)
-		goto out_unlock;
-
 	page_bufs = page_buffers(page);
+	BUG_ON(!page_bufs);
 	walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
 	/* As soon as we unlock the page, it can go away, but we have
 	 * references to buffers so we are safe */
@@ -3282,67 +3219,10 @@ static int __ext4_journalled_writepage(struct page *page,
 
 	walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
 	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
-	goto out;
-
-out_unlock:
-	unlock_page(page);
 out:
 	return ret;
 }
 
-static int ext4_journalled_writepage(struct page *page,
-				     struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	loff_t size = i_size_read(inode);
-	loff_t len;
-
-	trace_ext4_journalled_writepage(inode, page);
-	J_ASSERT(PageLocked(page));
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
-	else
-		len = PAGE_CACHE_SIZE;
-
-	if (page_has_buffers(page)) {
-		/* if page has buffers it should all be mapped
-		 * and allocated. If there are not buffers attached
-		 * to the page we know the page is dirty but it lost
-		 * buffers. That means that at some moment in time
-		 * after write_begin() / write_end() has been called
-		 * all buffers have been clean and thus they must have been
-		 * written at least once. So they are all mapped and we can
-		 * happily proceed with mapping them and writing the page.
-		 */
-		BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-					ext4_bh_delay_or_unwritten));
-	}
-
-	if (ext4_journal_current_handle())
-		goto no_write;
-
-	if (PageChecked(page)) {
-		/*
-		 * It's mmapped pagecache.  Add buffers and journal it.  There
-		 * doesn't seem much point in redirtying the page here.
-		 */
-		ClearPageChecked(page);
-		return __ext4_journalled_writepage(page, wbc);
-	} else {
-		/*
-		 * It may be a page full of checkpoint-mode buffers.  We don't
-		 * really know unless we go poke around in the buffer_heads.
-		 * But block_write_full_page will do the right thing.
-		 */
-		return block_write_full_page(page, noalloc_get_block_write,
-					     wbc);
-	}
-no_write:
-	redirty_page_for_writepage(wbc, page);
-	unlock_page(page);
-	return 0;
-}
-
 static int ext4_readpage(struct file *file, struct page *page)
 {
 	return mpage_readpage(page, ext4_get_block);
@@ -3489,7 +3369,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
 static const struct address_space_operations ext4_ordered_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
-	.writepage		= ext4_normal_writepage,
+	.writepage		= ext4_writepage,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_ordered_write_end,
@@ -3504,7 +3384,7 @@ static const struct address_space_operations ext4_ordered_aops = {
 static const struct address_space_operations ext4_writeback_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
-	.writepage		= ext4_normal_writepage,
+	.writepage		= ext4_writepage,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_writeback_write_end,
@@ -3519,7 +3399,7 @@ static const struct address_space_operations ext4_writeback_aops = {
 static const struct address_space_operations ext4_journalled_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
-	.writepage		= ext4_journalled_writepage,
+	.writepage		= ext4_writepage,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_journalled_write_end,
@@ -3533,7 +3413,7 @@ static const struct address_space_operations ext4_journalled_aops = {
 static const struct address_space_operations ext4_da_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
-	.writepage		= ext4_da_writepage,
+	.writepage		= ext4_writepage,
 	.writepages		= ext4_da_writepages,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext4_da_write_begin,
-- 
cgit v1.2.3


From 62e086be5d2abef8cad854bc5707329ad345f2ec Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sun, 14 Jun 2009 17:59:34 -0400
Subject: ext4: Move __ext4_journalled_writepage() to avoid forward declaration

In addition, fix two unused variable warnings.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 112 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 54 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 97c48b5b0578..c98e3afea30a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -47,10 +47,6 @@
 
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
-static int __ext4_journalled_writepage(struct page *page,
-				       struct writeback_control *wbc,
-				       unsigned int len);
-
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
 					      loff_t new_size)
 {
@@ -2522,6 +2518,59 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 	return ret;
 }
 
+static int bget_one(handle_t *handle, struct buffer_head *bh)
+{
+	get_bh(bh);
+	return 0;
+}
+
+static int bput_one(handle_t *handle, struct buffer_head *bh)
+{
+	put_bh(bh);
+	return 0;
+}
+
+static int __ext4_journalled_writepage(struct page *page,
+				       struct writeback_control *wbc,
+				       unsigned int len)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	struct buffer_head *page_bufs;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	page_bufs = page_buffers(page);
+	BUG_ON(!page_bufs);
+	walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
+	/* As soon as we unlock the page, it can go away, but we have
+	 * references to buffers so we are safe */
+	unlock_page(page);
+
+	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
+
+	ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
+				do_journal_get_write_access);
+
+	err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
+				write_end_fn);
+	if (ret == 0)
+		ret = err;
+	err = ext4_journal_stop(handle);
+	if (!ret)
+		ret = err;
+
+	walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
+	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
+out:
+	return ret;
+}
+
 /*
  * Note that we don't need to start a transaction unless we're journaling data
  * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@ -2564,7 +2613,7 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
  * Page also have the dirty flag cleared so we don't get recurive page_lock.
  */
 static int ext4_writepage(struct page *page,
-			     struct writeback_control *wbc)
+			  struct writeback_control *wbc)
 {
 	int ret = 0;
 	loff_t size;
@@ -3170,59 +3219,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, ext4_get_block);
 }
 
-static int bget_one(handle_t *handle, struct buffer_head *bh)
-{
-	get_bh(bh);
-	return 0;
-}
-
-static int bput_one(handle_t *handle, struct buffer_head *bh)
-{
-	put_bh(bh);
-	return 0;
-}
-
-static int __ext4_journalled_writepage(struct page *page,
-				       struct writeback_control *wbc,
-				       unsigned int len)
-{
-	struct address_space *mapping = page->mapping;
-	struct inode *inode = mapping->host;
-	struct buffer_head *page_bufs;
-	handle_t *handle = NULL;
-	int ret = 0;
-	int err;
-
-	page_bufs = page_buffers(page);
-	BUG_ON(!page_bufs);
-	walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
-	/* As soon as we unlock the page, it can go away, but we have
-	 * references to buffers so we are safe */
-	unlock_page(page);
-
-	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out;
-	}
-
-	ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
-				do_journal_get_write_access);
-
-	err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
-				write_end_fn);
-	if (ret == 0)
-		ret = err;
-	err = ext4_journal_stop(handle);
-	if (!ret)
-		ret = err;
-
-	walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
-	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
-out:
-	return ret;
-}
-
 static int ext4_readpage(struct file *file, struct page *page)
 {
 	return mpage_readpage(page, ext4_get_block);
-- 
cgit v1.2.3


From 61f98ffd74254a95871168bd5a6646b4f3002e31 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 11 Jun 2009 10:27:32 -0400
Subject: cifs: display scopeid in /proc/mounts

Move address display into a new function and display the scopeid as part
of the address in /proc/mounts.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8b315708cb3f..ddef913ff3e6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -333,6 +333,27 @@ cifs_destroy_inode(struct inode *inode)
 	kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
 }
 
+static void
+cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
+{
+	seq_printf(s, ",addr=");
+
+	switch (server->addr.sockAddr.sin_family) {
+	case AF_INET:
+		seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr);
+		break;
+	case AF_INET6:
+		seq_printf(s, "%pI6",
+			   &server->addr.sockAddr6.sin6_addr.s6_addr);
+		if (server->addr.sockAddr6.sin6_scope_id)
+			seq_printf(s, "%%%u",
+				   server->addr.sockAddr6.sin6_scope_id);
+		break;
+	default:
+		seq_printf(s, "(unknown)");
+	}
+}
+
 /*
  * cifs_show_options() is for displaying mount options in /proc/mounts.
  * Not all settable options are displayed but most of the important
@@ -343,7 +364,6 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 {
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *tcon;
-	struct TCP_Server_Info *server;
 
 	cifs_sb = CIFS_SB(m->mnt_sb);
 	tcon = cifs_sb->tcon;
@@ -364,16 +384,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
 		seq_printf(s, ",forcegid");
 
-	server = tcon->ses->server;
-	seq_printf(s, ",addr=");
-	switch (server->addr.sockAddr6.sin6_family) {
-	case AF_INET6:
-		seq_printf(s, "%pI6", &server->addr.sockAddr6.sin6_addr);
-		break;
-	case AF_INET:
-		seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr);
-		break;
-	}
+	cifs_show_address(s, tcon->ses->server);
 
 	if (!tcon->unix_ext)
 		seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
-- 
cgit v1.2.3


From 361ea1ae5451040cd254eee0b6df64581080b2cc Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 15 Jun 2009 13:46:12 +0000
Subject: [CIFS] Fix build break

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/netmisc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 00e6e357ae88..f9a54da97d34 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -166,7 +166,7 @@ int
 cifs_convert_address(char *src, void *dst)
 {
 	struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
-	struct sockaddr_in6 *s6 = (Struct sockaddr_in6 *) dst;
+	struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
 
 	if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) {
 		s4->sin_family = AF_INET;
-- 
cgit v1.2.3


From a566a6b11c86147fe9fc9db7ab15f9eecca3e862 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 15 Jun 2009 08:26:48 +0100
Subject: dlm: Fix uninitialised variable warning in lock.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  CC [M]  fs/dlm/lock.o
fs/dlm/lock.c: In function ‘find_rsb’:
fs/dlm/lock.c:438: warning: ‘r’ may be used uninitialized in this function

Since r is used on the error path to set r_ret, set it to NULL.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 205ec95b347e..eb507c453c5f 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -435,7 +435,7 @@ static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
 static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
 		    unsigned int flags, struct dlm_rsb **r_ret)
 {
-	struct dlm_rsb *r, *tmp;
+	struct dlm_rsb *r = NULL, *tmp;
 	uint32_t hash, bucket;
 	int error = -EINVAL;
 
-- 
cgit v1.2.3


From 24a5d59f3477bcff4c069ff4d0ca9a3e037d0235 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 18 Jun 2009 12:33:16 +0200
Subject: udf: Use device size when drive reported bogus number of written
 blocks

Some drives report 0 as the number of written blocks when there are some blocks
recorded. Use device size in such case so that we can automagically mount such
media.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/lowlevel.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 703843f30ffd..1b88fd5df05d 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -56,7 +56,12 @@ unsigned long udf_get_last_block(struct super_block *sb)
 	struct block_device *bdev = sb->s_bdev;
 	unsigned long lblock = 0;
 
-	if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock))
+	/*
+	 * ioctl failed or returned obviously bogus value?
+	 * Try using the device size...
+	 */
+	if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
+	    lblock == 0)
 		lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
 
 	if (lblock)
-- 
cgit v1.2.3


From c78a87d0a1fc885dfdbe21fd5e07787691dfb068 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 18 Jun 2009 13:20:24 -0500
Subject: dlm: fix plock use-after-free

Fix a regression from the original addition of nfs lock support
586759f03e2e9031ac5589912a51a909ed53c30a.  When a synchronous
(non-nfs) plock completes, the waiting thread will wake up and
free the op struct.  This races with the user thread in
dev_write() which goes on to read the op's callback field to
check if the lock is async and needs a callback.  This check
can happen on the freed op.  The fix is to note the callback
value before the op can be freed.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/plock.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 894a32d438d5..16f682e26c07 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -353,7 +353,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
 {
 	struct dlm_plock_info info;
 	struct plock_op *op;
-	int found = 0;
+	int found = 0, do_callback = 0;
 
 	if (count != sizeof(info))
 		return -EINVAL;
@@ -366,21 +366,24 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
 
 	spin_lock(&ops_lock);
 	list_for_each_entry(op, &recv_list, list) {
-		if (op->info.fsid == info.fsid && op->info.number == info.number &&
+		if (op->info.fsid == info.fsid &&
+		    op->info.number == info.number &&
 		    op->info.owner == info.owner) {
+			struct plock_xop *xop = (struct plock_xop *)op;
 			list_del_init(&op->list);
-			found = 1;
-			op->done = 1;
 			memcpy(&op->info, &info, sizeof(info));
+			if (xop->callback)
+				do_callback = 1;
+			else
+				op->done = 1;
+			found = 1;
 			break;
 		}
 	}
 	spin_unlock(&ops_lock);
 
 	if (found) {
-		struct plock_xop *xop;
-		xop = (struct plock_xop *)op;
-		if (xop->callback)
+		if (do_callback)
 			dlm_plock_callback(op);
 		else
 			wake_up(&recv_wq);
-- 
cgit v1.2.3


From b76a3f93d01fc93a87cb6eba4e854ffe378b4bac Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Mon, 8 Jun 2009 19:28:41 +0300
Subject: exofs: Fix bio leak in error handling path (sync read)

When failing a read request in the sync path, called from
write_begin, I forgot to free the allocated bio, fix it.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/inode.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 77d0a295eb1c..bb5d6ed0f7a8 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -295,6 +295,9 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
 err:
 	if (!is_sync)
 		_unlock_pcol_pages(pcol, ret, READ);
+	else /* Pages unlocked by caller in sync mode only free bio */
+		pcol_free(pcol);
+
 	kfree(pcol_copy);
 	if (or)
 		osd_end_request(or);
-- 
cgit v1.2.3


From 27d2e1491985e95c486d991302e399f5c584b4eb Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 14 Jun 2009 17:23:09 +0300
Subject: exofs: Remove IBM copyrights

Boaz,
Congrats on getting all the OSD stuff into 2.6.30!
I just pulled the git, and saw that the IBM copyrights are still there.
Please remove them from all files:
 * Copyright (C) 2005, 2006
 * International Business Machines

IBM has revoked all rights on the code - they gave it to me.

Thanks!
Avishay

Signed-off-by: Avishay Traeger <avishay@gmail.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/common.h  | 4 +---
 fs/exofs/dir.c     | 4 +---
 fs/exofs/exofs.h   | 4 +---
 fs/exofs/file.c    | 4 +---
 fs/exofs/inode.c   | 4 +---
 fs/exofs/namei.c   | 4 +---
 fs/exofs/osd.c     | 4 +---
 fs/exofs/super.c   | 4 +---
 fs/exofs/symlink.c | 4 +---
 9 files changed, 9 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 24667eedc023..c6718e4817fe 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -2,9 +2,7 @@
  * common.h - Common definitions for both Kernel and user-mode utilities
  *
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 65b0c8c776a1..4cfab1cc75c0 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -1,8 +1,6 @@
 /*
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 0fd4c7859679..c413b74ecf31 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -1,8 +1,6 @@
 /*
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 6ed7fe484752..c6810038d637 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -1,8 +1,6 @@
 /*
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index bb5d6ed0f7a8..6c10f7476699 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1,8 +1,6 @@
 /*
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 77fdd765e76d..b7dd0c236863 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -1,8 +1,6 @@
 /*
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index b3d2ccb87aaa..4372542df284 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -1,8 +1,6 @@
 /*
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 8216c5b77b53..e47b38e55a26 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -1,8 +1,6 @@
 /*
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
index 36e2d7bc7f7b..4dd687c3e747 100644
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -1,8 +1,6 @@
 /*
  * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
- * Copyright (C) 2005, 2006
- * International Business Machines
+ * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
  * Boaz Harrosh <bharrosh@panasas.com>
  *
-- 
cgit v1.2.3


From baaf94cdc7fe1c61e3c660a3b055724fd9d0a034 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 14 Jun 2009 16:52:10 +0300
Subject: exofs: Avoid using file_fsync()

The use of file_fsync() in exofs_file_sync() is not necessary since it
does some extra stuff not used by exofs. Open code just the parts that
are currently needed.

TODO: Farther optimization can be done to sync the sb only on inode
update of new files, Usually the sb update is not needed in exofs.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/exofs.h |  3 +++
 fs/exofs/file.c  | 17 ++++++++++++-----
 fs/exofs/super.c |  2 +-
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index c413b74ecf31..5ec72e020b22 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -154,6 +154,9 @@ ino_t exofs_parent_ino(struct dentry *child);
 int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
 		    struct inode *);
 
+/* super.c               */
+int exofs_sync_fs(struct super_block *sb, int wait);
+
 /*********************
  * operation vectors *
  *********************/
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index c6810038d637..839b9dc1e70f 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -45,16 +45,23 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
 {
 	int ret;
 	struct address_space *mapping = filp->f_mapping;
+	struct inode *inode = dentry->d_inode;
+	struct super_block *sb;
 
 	ret = filemap_write_and_wait(mapping);
 	if (ret)
 		return ret;
 
-	/*Note: file_fsync below also calles sync_blockdev, which is a no-op
-	 *      for exofs, but other then that it does sync_inode and
-	 *      sync_superblock which is what we need here.
-	 */
-	return file_fsync(filp, dentry, datasync);
+	/* sync the inode attributes */
+	ret = write_inode_now(inode, 1);
+
+	/* This is a good place to write the sb */
+	/* TODO: Sechedule an sb-sync on create */
+	sb = inode->i_sb;
+	if (sb->s_dirt)
+		exofs_sync_fs(sb, 1);
+
+	return ret;
 }
 
 static int exofs_flush(struct file *file, fl_owner_t id)
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index e47b38e55a26..a343b4ea62f6 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -198,7 +198,7 @@ static const struct export_operations exofs_export_ops;
 /*
  * Write the superblock to the OSD
  */
-static int exofs_sync_fs(struct super_block *sb, int wait)
+int exofs_sync_fs(struct super_block *sb, int wait)
 {
 	struct exofs_sb_info *sbi;
 	struct exofs_fscb *fscb;
-- 
cgit v1.2.3


From 1c520dfbf391e1617ef61553f815b8006a066c44 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 19 Jun 2009 15:14:13 -0700
Subject: ocfs2: Provide the ocfs2_dlm_lvb_valid() stack API.

The Lock Value Block (LVB) of a DLM lock can be lost when nodes die and
the DLM cannot reconstruct its state.  Clients of the DLM need to know
this.

ocfs2's internal DLM, o2dlm, explicitly zeroes out the LVB when it loses
track of the state.  This is not a standard behavior, but ocfs2 has
always relied on it.  Thus, an o2dlm LVB is always "valid".

ocfs2 now supports both o2dlm and fs/dlm via the stack glue.  When
fs/dlm loses track of an LVBs state, it sets a flag
(DLM_SBF_VALNOTVALID) on the Lock Status Block (LKSB).  The contents of
the LVB may be garbage or merely stale.

ocfs2 doesn't want to try to guess at the validity of the stale LVB.
Instead, it should be checking the VALNOTVALID flag.  As this is the
'standard' way of treating LVBs, we will promote this behavior.

We add a stack glue API ocfs2_dlm_lvb_valid().  It returns non-zero when
the LVB is valid.  o2dlm will always return valid, while fs/dlm will
check VALNOTVALID.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/dlmglue.c    |  9 ++++++---
 fs/ocfs2/stack_o2cb.c | 11 +++++++++++
 fs/ocfs2/stack_user.c |  8 ++++++++
 fs/ocfs2/stackglue.c  | 13 +++++++------
 fs/ocfs2/stackglue.h  |  6 ++++++
 5 files changed, 38 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6cdeaa76f27f..83d2ddb27186 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1989,7 +1989,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
 {
 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
-	if (lvb->lvb_version == OCFS2_LVB_VERSION
+	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
+	    && lvb->lvb_version == OCFS2_LVB_VERSION
 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
 		return 1;
 	return 0;
@@ -2382,7 +2383,8 @@ int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
 		return status;
 
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-	if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
+	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
+	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
 	return status;
 }
@@ -3627,7 +3629,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
 	struct ocfs2_global_disk_dqinfo *gdinfo;
 	int status = 0;
 
-	if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
+	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
+	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index fcd120f1493a..3f661376a2de 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 	return dlm_status_to_errno(lksb->lksb_o2dlm.status);
 }
 
+/*
+ * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
+ * contents, it will zero out the LVB.  Thus the caller can always trust
+ * the contents.
+ */
+static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+	return 1;
+}
+
 static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
 	return (void *)(lksb->lksb_o2dlm.lvb);
@@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = {
 	.dlm_lock	= o2cb_dlm_lock,
 	.dlm_unlock	= o2cb_dlm_unlock,
 	.lock_status	= o2cb_dlm_lock_status,
+	.lvb_valid	= o2cb_dlm_lvb_valid,
 	.lock_lvb	= o2cb_dlm_lvb,
 	.dump_lksb	= o2cb_dump_lksb,
 };
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 9b76d41a8ac6..ff4c798a5635 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 	return lksb->lksb_fsdlm.sb_status;
 }
 
+static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+	int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
+
+	return !invalid;
+}
+
 static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
 	if (!lksb->lksb_fsdlm.sb_lvbptr)
@@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
 	.dlm_lock	= user_dlm_lock,
 	.dlm_unlock	= user_dlm_unlock,
 	.lock_status	= user_dlm_lock_status,
+	.lvb_valid	= user_dlm_lvb_valid,
 	.lock_lvb	= user_dlm_lvb,
 	.plock		= user_plock,
 	.dump_lksb	= user_dlm_dump_lksb,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 68b668b0e60a..3f2f1c45b7b6 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -6,7 +6,7 @@
  * Code which implements an OCFS2 specific interface to underlying
  * cluster stacks.
  *
- * Copyright (C) 2007 Oracle.  All rights reserved.
+ * Copyright (C) 2007, 2009 Oracle.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 }
 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
 
-/*
- * Why don't we cast to ocfs2_meta_lvb?  The "clean" answer is that we
- * don't cast at the glue level.  The real answer is that the header
- * ordering is nigh impossible.
- */
+int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+	return active_stack->sp_ops->lvb_valid(lksb);
+}
+EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid);
+
 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
 	return active_stack->sp_ops->lock_lvb(lksb);
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index c571af375ef8..03a44d60eac9 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -185,6 +185,11 @@ struct ocfs2_stack_operations {
 	 */
 	int (*lock_status)(union ocfs2_dlm_lksb *lksb);
 
+	/*
+	 * Return non-zero if the LVB is valid.
+	 */
+	int (*lvb_valid)(union ocfs2_dlm_lksb *lksb);
+
 	/*
 	 * Pull the lvb pointer off of the stack-specific lksb.
 	 */
@@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
 		     struct ocfs2_lock_res *astarg);
 
 int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
+int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb);
 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
 void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
 
-- 
cgit v1.2.3


From 1962f39abbb2d5643a7d59169422661a2d58793d Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Fri, 19 Jun 2009 15:36:52 +0800
Subject: ocfs2: Update atime in splice read if necessary.

We should call ocfs2_inode_lock_atime instead of ocfs2_inode_lock
in ocfs2_file_splice_read like we do in ocfs2_file_aio_read so
that we can update atime in splice read if necessary.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 07267e0da909..62442e413a00 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
 				      size_t len,
 				      unsigned int flags)
 {
-	int ret = 0;
+	int ret = 0, lock_level = 0;
 	struct inode *inode = in->f_path.dentry->d_inode;
 
 	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
@@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
 	/*
 	 * See the comment in ocfs2_file_aio_read()
 	 */
-	ret = ocfs2_inode_lock(inode, NULL, 0);
+	ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto bail;
 	}
-	ocfs2_inode_unlock(inode, 0);
+	ocfs2_inode_unlock(inode, lock_level);
 
 	ret = generic_file_splice_read(in, ppos, pipe, len, flags);
 
-- 
cgit v1.2.3


From 94e41ecfe0f202df948fdbb19a53308a58cf2184 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 19 Jun 2009 14:45:54 -0700
Subject: ocfs2: Pin journal head before accessing jh->b_committed_data

This patch adds jbd_lock_bh_state() and jbd_unlock_bh_state() around accessses
to jh->b_committed_data.

Fixes oss bugzilla#1131
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1131

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/suballoc.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8439f6b324b9..73a16d4666dc 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 					 int nr)
 {
 	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+	int ret;
 
 	if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
 		return 0;
-	if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
+
+	if (!buffer_jbd(bg_bh))
 		return 1;
 
+	jbd_lock_bh_state(bg_bh);
 	bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
-	return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+	if (bg)
+		ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+	else
+		ret = 1;
+	jbd_unlock_bh_state(bg_bh);
+
+	return ret;
 }
 
 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
@@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 	unsigned int tmp;
 	int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
 	struct ocfs2_group_desc *undo_bg = NULL;
+	int cluster_bitmap = 0;
 
 	mlog_entry_void();
 
@@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 	}
 
 	if (ocfs2_is_cluster_bitmap(alloc_inode))
-		undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
+		cluster_bitmap = 1;
+
+	if (cluster_bitmap) {
+		jbd_lock_bh_state(group_bh);
+		undo_bg = (struct ocfs2_group_desc *)
+					bh2jh(group_bh)->b_committed_data;
+		BUG_ON(!undo_bg);
+	}
 
 	tmp = num_bits;
 	while(tmp--) {
 		ocfs2_clear_bit((bit_off + tmp),
 				(unsigned long *) bg->bg_bitmap);
-		if (ocfs2_is_cluster_bitmap(alloc_inode))
+		if (cluster_bitmap)
 			ocfs2_set_bit(bit_off + tmp,
 				      (unsigned long *) undo_bg->bg_bitmap);
 	}
 	le16_add_cpu(&bg->bg_free_bits_count, num_bits);
 
+	if (cluster_bitmap)
+		jbd_unlock_bh_state(group_bh);
+
 	status = ocfs2_journal_dirty(handle, group_bh);
 	if (status < 0)
 		mlog_errno(status);
-- 
cgit v1.2.3


From c3d38840abaa45c1c5a5fabbb8ffc9a0d1a764d1 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 19 Jun 2009 14:45:55 -0700
Subject: ocfs2: Fix ocfs2_osb_dump()

Skip printing information that is not valid for local mounts.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/super.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d33767f17ba3..d64739b593e9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -234,20 +234,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 			"%10s => Opts: 0x%lX  AtimeQuanta: %u\n", "Mount",
 			osb->s_mount_opt, osb->s_atime_quantum);
 
-	out += snprintf(buf + out, len - out,
-			"%10s => Stack: %s  Name: %*s  Version: %d.%d\n",
-			"Cluster",
-			(*osb->osb_cluster_stack == '\0' ?
-			 "o2cb" : osb->osb_cluster_stack),
-			cconn->cc_namelen, cconn->cc_name,
-			cconn->cc_version.pv_major, cconn->cc_version.pv_minor);
+	if (cconn) {
+		out += snprintf(buf + out, len - out,
+				"%10s => Stack: %s  Name: %*s  "
+				"Version: %d.%d\n", "Cluster",
+				(*osb->osb_cluster_stack == '\0' ?
+				 "o2cb" : osb->osb_cluster_stack),
+				cconn->cc_namelen, cconn->cc_name,
+				cconn->cc_version.pv_major,
+				cconn->cc_version.pv_minor);
+	}
 
 	spin_lock(&osb->dc_task_lock);
 	out += snprintf(buf + out, len - out,
 			"%10s => Pid: %d  Count: %lu  WakeSeq: %lu  "
 			"WorkSeq: %lu\n", "DownCnvt",
-			task_pid_nr(osb->dc_task), osb->blocked_lock_count,
-			osb->dc_wake_sequence, osb->dc_work_sequence);
+			(osb->dc_task ?  task_pid_nr(osb->dc_task) : -1),
+			osb->blocked_lock_count, osb->dc_wake_sequence,
+			osb->dc_work_sequence);
 	spin_unlock(&osb->dc_task_lock);
 
 	spin_lock(&osb->osb_lock);
@@ -267,14 +271,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 
 	out += snprintf(buf + out, len - out,
 			"%10s => Pid: %d  Interval: %lu  Needs: %d\n", "Commit",
-			task_pid_nr(osb->commit_task), osb->osb_commit_interval,
+			(osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
+			osb->osb_commit_interval,
 			atomic_read(&osb->needs_checkpoint));
 
 	out += snprintf(buf + out, len - out,
-			"%10s => State: %d  NumTxns: %d  TxnId: %lu\n",
+			"%10s => State: %d  TxnId: %lu  NumTxns: %d\n",
 			"Journal", osb->journal->j_state,
-			atomic_read(&osb->journal->j_num_trans),
-			osb->journal->j_trans_id);
+			osb->journal->j_trans_id,
+			atomic_read(&osb->journal->j_num_trans));
 
 	out += snprintf(buf + out, len - out,
 			"%10s => GlobalAllocs: %d  LocalAllocs: %d  "
@@ -302,7 +307,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 
 	out += snprintf(buf + out, len - out, "%10s => %3s  %10s\n",
 			"Slots", "Num", "RecoGen");
-
 	for (i = 0; i < osb->max_slots; ++i) {
 		out += snprintf(buf + out, len - out,
 				"%10s  %c %3d  %10d\n",
-- 
cgit v1.2.3


From 692684e19e317a374c18e70a44d6413e51f71c11 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 19 Jun 2009 16:53:17 -0700
Subject: ocfs2: Stop orphan scan as early as possible during umount

Currently if the orphan scan fires a tick before the user issues the umount,
the umount will wait for the queued orphan scan tasks to complete.

This patch makes the umount stop the orphan scan as early as possible so as
to reduce the probability of the queued tasks slowing down the umount.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/journal.c | 14 ++++++++++++--
 fs/ocfs2/ocfs2.h   |  6 ++++++
 fs/ocfs2/super.c   |  5 +++--
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4a3b9e6b31ad..70215a21fb20 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1880,6 +1880,9 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
 
 	os = &osb->osb_orphan_scan;
 
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+		goto out;
+
 	status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX);
 	if (status < 0) {
 		if (status != -EAGAIN)
@@ -1887,6 +1890,10 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
 		goto out;
 	}
 
+	/* Do no queue the tasks if the volume is being umounted */
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+		goto unlock;
+
 	if (os->os_seqno != seqno) {
 		os->os_seqno = seqno;
 		goto unlock;
@@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
 
 	mutex_lock(&os->os_lock);
 	ocfs2_queue_orphan_scan(osb);
-	schedule_delayed_work(&os->os_orphan_scan_work,
-			      ocfs2_orphan_scan_timeout());
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
+		schedule_delayed_work(&os->os_orphan_scan_work,
+				      ocfs2_orphan_scan_timeout());
 	mutex_unlock(&os->os_lock);
 }
 
@@ -1930,6 +1938,7 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
 	struct ocfs2_orphan_scan *os;
 
 	os = &osb->osb_orphan_scan;
+	atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
 	mutex_lock(&os->os_lock);
 	cancel_delayed_work(&os->os_orphan_scan_work);
 	mutex_unlock(&os->os_lock);
@@ -1940,6 +1949,7 @@ int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
 	struct ocfs2_orphan_scan *os;
 
 	os = &osb->osb_orphan_scan;
+	atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
 	os->os_osb = osb;
 	os->os_count = 0;
 	os->os_scantime = CURRENT_TIME;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 18c1d9ec1c93..60e89503ce5a 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -154,6 +154,11 @@ struct ocfs2_lock_res {
 #endif
 };
 
+enum ocfs2_orphan_scan_state {
+	ORPHAN_SCAN_ACTIVE,
+	ORPHAN_SCAN_INACTIVE
+};
+
 struct ocfs2_orphan_scan {
 	struct mutex 		os_lock;
 	struct ocfs2_super 	*os_osb;
@@ -162,6 +167,7 @@ struct ocfs2_orphan_scan {
 	struct timespec		os_scantime;  /* time this node ran the scan */
 	u32			os_count;      /* tracks node specific scans */
 	u32  			os_seqno;       /* tracks cluster wide scans */
+	atomic_t		os_state;              /* ACTIVE or INACTIVE */
 };
 
 struct ocfs2_dlm_debug {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d64739b593e9..3e8a68b103ab 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1814,14 +1814,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
 	debugfs_remove(osb->osb_ctxt);
 
+	/* Orphan scan should be stopped as early as possible */
+	ocfs2_orphan_scan_stop(osb);
+
 	ocfs2_disable_quotas(osb);
 
 	ocfs2_shutdown_local_alloc(osb);
 
 	ocfs2_truncate_log_shutdown(osb);
 
-	ocfs2_orphan_scan_stop(osb);
-
 	/* This will disable recovery and flush any recovery work. */
 	ocfs2_recovery_exit(osb);
 
-- 
cgit v1.2.3


From 3211949f8998dde71d9fe2e063de045ece5e0473 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 19 Jun 2009 16:53:18 -0700
Subject: ocfs2: Do not initialize lvb in ocfs2_orphan_scan_lock_res_init()

We don't access the LVB in our ocfs2_*_lock_res_init() functions.

Since the LVB can become invalid during some cluster recovery
operations, the dlmglue must be able to handle an uninitialized
LVB.

For the orphan scan lock, we initialized an uninitialzed LVB with our
scan sequence number plus one.  This starts a normal orphan scan
cycle.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 7 +++----
 fs/ocfs2/journal.c | 1 +
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 83d2ddb27186..d3d1f9372f7e 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -644,14 +644,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
 					    struct ocfs2_super *osb)
 {
-	struct ocfs2_orphan_scan_lvb *lvb;
-
 	ocfs2_lock_res_init_once(res);
 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
 				   &ocfs2_orphan_scan_lops, osb);
-	lvb = ocfs2_dlm_lvb(&res->l_lksb);
-	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
 }
 
 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
@@ -2386,6 +2382,9 @@ int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
+	else
+		*seqno = osb->osb_orphan_scan.os_seqno + 1;
+
 	return status;
 }
 
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 70215a21fb20..0b2c27a9485e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1952,6 +1952,7 @@ int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
 	atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
 	os->os_osb = osb;
 	os->os_count = 0;
+	os->os_seqno = 0;
 	os->os_scantime = CURRENT_TIME;
 	mutex_init(&os->os_lock);
 
-- 
cgit v1.2.3


From df152c241df9e9d2b9a65d37bd02961abe7f591a Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 22 Jun 2009 11:40:07 -0700
Subject: ocfs2: Disable orphan scanning for local and hard-ro mounts

Local and Hard-RO mounts do not need orphan scanning.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 26 ++++++++++++++++----------
 fs/ocfs2/dlmglue.h |  4 ++--
 fs/ocfs2/journal.c | 30 +++++++++++++++++-------------
 fs/ocfs2/journal.h |  2 +-
 fs/ocfs2/super.c   | 32 +++++++++++++++-----------------
 5 files changed, 51 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index d3d1f9372f7e..1841bbb49cb6 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2366,15 +2366,20 @@ void ocfs2_inode_unlock(struct inode *inode,
 	mlog_exit_void();
 }
 
-int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
 {
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_orphan_scan_lvb *lvb;
-	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	int status = 0;
 
+	if (ocfs2_is_hard_readonly(osb))
+		return -EROFS;
+
+	if (ocfs2_mount_local(osb))
+		return 0;
+
 	lockres = &osb->osb_orphan_scan.os_lockres;
-	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
 	if (status < 0)
 		return status;
 
@@ -2388,17 +2393,18 @@ int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
 	return status;
 }
 
-void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
 {
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_orphan_scan_lvb *lvb;
-	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 
-	lockres = &osb->osb_orphan_scan.os_lockres;
-	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
-	lvb->lvb_os_seqno = cpu_to_be32(seqno);
-	ocfs2_cluster_unlock(osb, lockres, level);
+	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
+		lockres = &osb->osb_orphan_scan.os_lockres;
+		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
+		lvb->lvb_os_seqno = cpu_to_be32(seqno);
+		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
+	}
 }
 
 int ocfs2_super_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 31b90d7b8f51..30f683107f1e 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -121,8 +121,8 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
 		     int ex);
 void ocfs2_super_unlock(struct ocfs2_super *osb,
 			int ex);
-int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex);
-void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex);
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno);
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno);
 
 int ocfs2_rename_lock(struct ocfs2_super *osb);
 void ocfs2_rename_unlock(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 0b2c27a9485e..f033760ecbea 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1883,7 +1883,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
 	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
 		goto out;
 
-	status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX);
+	status = ocfs2_orphan_scan_lock(osb, &seqno);
 	if (status < 0) {
 		if (status != -EAGAIN)
 			mlog_errno(status);
@@ -1910,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
 	os->os_count++;
 	os->os_scantime = CURRENT_TIME;
 unlock:
-	ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX);
+	ocfs2_orphan_scan_unlock(osb, seqno);
 out:
 	return;
 }
@@ -1938,29 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
 	struct ocfs2_orphan_scan *os;
 
 	os = &osb->osb_orphan_scan;
-	atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
-	mutex_lock(&os->os_lock);
-	cancel_delayed_work(&os->os_orphan_scan_work);
-	mutex_unlock(&os->os_lock);
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
+		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
+		mutex_lock(&os->os_lock);
+		cancel_delayed_work(&os->os_orphan_scan_work);
+		mutex_unlock(&os->os_lock);
+	}
 }
 
-int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
+void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
 {
 	struct ocfs2_orphan_scan *os;
 
 	os = &osb->osb_orphan_scan;
-	atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
 	os->os_osb = osb;
 	os->os_count = 0;
 	os->os_seqno = 0;
 	os->os_scantime = CURRENT_TIME;
 	mutex_init(&os->os_lock);
+	INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
 
-	INIT_DELAYED_WORK(&os->os_orphan_scan_work,
-			  ocfs2_orphan_scan_work);
-	schedule_delayed_work(&os->os_orphan_scan_work,
-			      ocfs2_orphan_scan_timeout());
-	return 0;
+	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
+		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
+	else {
+		atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
+		schedule_delayed_work(&os->os_orphan_scan_work,
+				      ocfs2_orphan_scan_timeout());
+	}
 }
 
 struct ocfs2_orphan_filldir_priv {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 61045eeb3f6e..5432c7f79cc6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 }
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
-int ocfs2_orphan_scan_init(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 3e8a68b103ab..0746e1abdfc7 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -205,11 +205,10 @@ static const match_table_t tokens = {
 #ifdef CONFIG_DEBUG_FS
 static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 {
-	int out = 0;
-	int i;
 	struct ocfs2_cluster_connection *cconn = osb->cconn;
 	struct ocfs2_recovery_map *rm = osb->recovery_map;
-	struct ocfs2_orphan_scan *os;
+	struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
+	int i, out = 0;
 
 	out += snprintf(buf + out, len - out,
 			"%10s => Id: %-s  Uuid: %-s  Gen: 0x%X  Label: %-s\n",
@@ -305,6 +304,16 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 			atomic_read(&osb->s_num_inodes_stolen));
 	spin_unlock(&osb->osb_lock);
 
+	out += snprintf(buf + out, len - out, "OrphanScan => ");
+	out += snprintf(buf + out, len - out, "Local: %u  Global: %u ",
+			os->os_count, os->os_seqno);
+	out += snprintf(buf + out, len - out, " Last Scan: ");
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+		out += snprintf(buf + out, len - out, "Disabled\n");
+	else
+		out += snprintf(buf + out, len - out, "%lu seconds ago\n",
+				(get_seconds() - os->os_scantime.tv_sec));
+
 	out += snprintf(buf + out, len - out, "%10s => %3s  %10s\n",
 			"Slots", "Num", "RecoGen");
 	for (i = 0; i < osb->max_slots; ++i) {
@@ -315,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 				i, osb->slot_recovery_generations[i]);
 	}
 
-	os = &osb->osb_orphan_scan;
-	out += snprintf(buf + out, len - out, "Orphan Scan=> ");
-	out += snprintf(buf + out, len - out, "Local: %u  Global: %u ",
-			os->os_count, os->os_seqno);
-	out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n",
-			(get_seconds() - os->os_scantime.tv_sec));
-
 	return out;
 }
 
@@ -1179,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
 	wake_up(&osb->osb_mount_event);
 
+	/* Start this when the mount is almost sure of being successful */
+	ocfs2_orphan_scan_init(osb);
+
 	mlog_exit(status);
 	return status;
 
@@ -1983,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 		goto bail;
 	}
 
-	status = ocfs2_orphan_scan_init(osb);
-	if (status) {
-		mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n");
-		mlog_errno(status);
-		goto bail;
-	}
-
 	init_waitqueue_head(&osb->checkpoint_event);
 	atomic_set(&osb->needs_checkpoint, 0);
 
-- 
cgit v1.2.3


From 9a7aa12f3911853a3574d47d567b81a2a5df7208 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 4 Jun 2009 15:26:49 +0200
Subject: vfs: Set special lockdep map for dirs only if not set by fs

Some filesystems need to set lockdep map for i_mutex differently for
different directories. For example OCFS2 has system directories (for
orphan inode tracking and for gathering all system files like journal
or quota files into a single place) which have different locking
locking rules than standard directories. For a filesystem setting
lockdep map is naturaly done when the inode is read but we have to
modify unlock_new_inode() not to overwrite the lockdep map the filesystem
has set.

Acked-by: peterz@infradead.org
CC: mingo@redhat.com
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/inode.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index f643be565df8..04c785bb63c3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -665,12 +665,17 @@ void unlock_new_inode(struct inode *inode)
 	if (inode->i_mode & S_IFDIR) {
 		struct file_system_type *type = inode->i_sb->s_type;
 
-		/*
-		 * ensure nobody is actually holding i_mutex
-		 */
-		mutex_destroy(&inode->i_mutex);
-		mutex_init(&inode->i_mutex);
-		lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
+		/* Set new key only if filesystem hasn't already changed it */
+		if (!lockdep_match_class(&inode->i_mutex,
+		    &type->i_mutex_key)) {
+			/*
+			 * ensure nobody is actually holding i_mutex
+			 */
+			mutex_destroy(&inode->i_mutex);
+			mutex_init(&inode->i_mutex);
+			lockdep_set_class(&inode->i_mutex,
+					  &type->i_mutex_dir_key);
+		}
 	}
 #endif
 	/*
-- 
cgit v1.2.3


From cb25797d451dc774d9dbc402a65f16a0e32199fe Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 4 Jun 2009 15:26:50 +0200
Subject: ocfs2: Add lockdep annotations

Add lockdep support to OCFS2. The support also covers all of the cluster
locks except for open locks, journal locks, and local quotafile locks. These
are special because they are acquired for a node, not for a particular process
and lockdep cannot deal with such type of locking.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 81 +++++++++++++++++++++++++++++++++++++++++++-----------
 fs/ocfs2/dlmglue.h | 20 ++++++++++++--
 fs/ocfs2/inode.c   | 11 ++++++++
 fs/ocfs2/namei.c   | 15 ++++++----
 fs/ocfs2/ocfs2.h   |  4 +++
 fs/ocfs2/sysfile.c | 17 ++++++++++++
 6 files changed, 123 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 1841bbb49cb6..110bb57c46ab 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl {
 	enum ocfs2_unblock_action unblock_action;
 };
 
+/* Lockdep class keys */
+struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
+
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level);
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
@@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
 			     u32 dlm_flags);
 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
 						     int wanted);
-static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
-				 struct ocfs2_lock_res *lockres,
-				 int level);
+static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
+				   struct ocfs2_lock_res *lockres,
+				   int level, unsigned long caller_ip);
+static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
+					struct ocfs2_lock_res *lockres,
+					int level)
+{
+	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
+}
+
 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
@@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
 
 	ocfs2_init_lock_stats(res);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (type != OCFS2_LOCK_TYPE_OPEN)
+		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
+				 &lockdep_keys[type], 0);
+	else
+		res->l_lockdep_map.key = NULL;
+#endif
 }
 
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
@@ -1252,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
 	return ret;
 }
 
-static int ocfs2_cluster_lock(struct ocfs2_super *osb,
-			      struct ocfs2_lock_res *lockres,
-			      int level,
-			      u32 lkm_flags,
-			      int arg_flags)
+static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
+				struct ocfs2_lock_res *lockres,
+				int level,
+				u32 lkm_flags,
+				int arg_flags,
+				int l_subclass,
+				unsigned long caller_ip)
 {
 	struct ocfs2_mask_waiter mw;
 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
@@ -1399,13 +1418,37 @@ out:
 	}
 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (!ret && lockres->l_lockdep_map.key != NULL) {
+		if (level == DLM_LOCK_PR)
+			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
+				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
+				caller_ip);
+		else
+			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
+				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
+				caller_ip);
+	}
+#endif
 	mlog_exit(ret);
 	return ret;
 }
 
-static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
-				 struct ocfs2_lock_res *lockres,
-				 int level)
+static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
+				     struct ocfs2_lock_res *lockres,
+				     int level,
+				     u32 lkm_flags,
+				     int arg_flags)
+{
+	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
+				    0, _RET_IP_);
+}
+
+
+static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
+				   struct ocfs2_lock_res *lockres,
+				   int level,
+				   unsigned long caller_ip)
 {
 	unsigned long flags;
 
@@ -1414,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
 	ocfs2_dec_holders(lockres, level);
 	ocfs2_downconvert_on_unlock(osb, lockres);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (lockres->l_lockdep_map.key != NULL)
+		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
+#endif
 	mlog_exit_void();
 }
 
@@ -2159,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode,
  * returns < 0 error if the callback will never be called, otherwise
  * the result of the lock will be communicated via the callback.
  */
-int ocfs2_inode_lock_full(struct inode *inode,
-			 struct buffer_head **ret_bh,
-			 int ex,
-			 int arg_flags)
+int ocfs2_inode_lock_full_nested(struct inode *inode,
+				 struct buffer_head **ret_bh,
+				 int ex,
+				 int arg_flags,
+				 int subclass)
 {
 	int status, level, acquired;
 	u32 dlm_flags;
@@ -2200,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode,
 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
 		dlm_flags |= DLM_LKF_NOQUEUE;
 
-	status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);
+	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
+				      arg_flags, subclass, _RET_IP_);
 	if (status < 0) {
 		if (status != -EAGAIN && status != -EIOCBRETRY)
 			mlog_errno(status);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 30f683107f1e..7553836931de 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb {
 /* don't block waiting for the downconvert thread, instead return -EAGAIN */
 #define OCFS2_LOCK_NONBLOCK		(0x04)
 
+/* Locking subclasses of inode cluster lock */
+enum {
+	OI_LS_NORMAL = 0,
+	OI_LS_PARENT,
+	OI_LS_RENAME1,
+	OI_LS_RENAME2,
+};
+
 int ocfs2_dlm_init(struct ocfs2_super *osb);
 void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
@@ -104,17 +112,23 @@ void ocfs2_open_unlock(struct inode *inode);
 int ocfs2_inode_lock_atime(struct inode *inode,
 			  struct vfsmount *vfsmnt,
 			  int *level);
-int ocfs2_inode_lock_full(struct inode *inode,
+int ocfs2_inode_lock_full_nested(struct inode *inode,
 			 struct buffer_head **ret_bh,
 			 int ex,
-			 int arg_flags);
+			 int arg_flags,
+			 int subclass);
 int ocfs2_inode_lock_with_page(struct inode *inode,
 			      struct buffer_head **ret_bh,
 			      int ex,
 			      struct page *page);
+/* Variants without special locking class or flags */
+#define ocfs2_inode_lock_full(i, r, e, f)\
+		ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
+#define ocfs2_inode_lock_nested(i, b, e, s)\
+		ocfs2_inode_lock_full_nested(i, b, e, 0, s)
 /* 99% of the time we don't want to supply any additional flags --
  * those are for very specific cases only. */
-#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0)
+#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
 void ocfs2_inode_unlock(struct inode *inode,
 		       int ex);
 int ocfs2_super_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 10e1fa87396a..4dc8890ba316 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -215,6 +215,8 @@ bail:
 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
 {
 	struct ocfs2_find_inode_args *args = opaque;
+	static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
+				     ocfs2_file_ip_alloc_sem_key;
 
 	mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
 
@@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
 	if (args->fi_sysfile_type != 0)
 		lockdep_set_class(&inode->i_mutex,
 			&ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
+	if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
+	    args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
+	    args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+	    args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
+		lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+				  &ocfs2_quota_ip_alloc_sem_key);
+	else
+		lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+				  &ocfs2_file_ip_alloc_sem_key);
 
 	mlog_exit(0);
 	return 0;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 33464c6b60a2..8601f934010b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
-	status = ocfs2_inode_lock(dir, NULL, 0);
+	status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	err = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
+	err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
@@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir,
 		return -EPERM;
 	}
 
-	status = ocfs2_inode_lock(dir, &parent_node_bh, 1);
+	status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
+					 OI_LS_PARENT);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 			inode1 = tmpinode;
 		}
 		/* lock id2 */
-		status = ocfs2_inode_lock(inode2, bh2, 1);
+		status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+						 OI_LS_RENAME1);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
@@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 	}
 
 	/* lock id1 */
-	status = ocfs2_inode_lock(inode1, bh1, 1);
+	status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
 	if (status < 0) {
 		/*
 		 * An error return must mean that no cluster locks
@@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir,
 	 * won't have to concurrently downconvert the inode and the
 	 * dentry locks.
 	 */
-	status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1);
+	status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1,
+					 OI_LS_PARENT);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 60e89503ce5a..c9345ebb8493 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -34,6 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/lockdep.h>
 #ifndef CONFIG_OCFS2_COMPAT_JBD
 # include <linux/jbd2.h>
 #else
@@ -152,6 +153,9 @@ struct ocfs2_lock_res {
 	unsigned int		 l_lock_max_exmode; 	   /* Max wait for EX */
 	unsigned int		 l_lock_refresh;	   /* Disk refreshes */
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	 l_lockdep_map;
+#endif
 };
 
 enum ocfs2_orphan_scan_state {
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index ab713ebdd546..6f53f5e7256a 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -50,6 +50,8 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb,
 					   int type,
 					   u32 slot);
 
+static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
+
 static inline int is_global_system_inode(int type)
 {
 	return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
@@ -118,6 +120,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
 		inode = NULL;
 		goto bail;
 	}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+	    type == LOCAL_GROUP_QUOTA_SYSTEM_INODE ||
+	    type == JOURNAL_SYSTEM_INODE) {
+		/* Ignore inode lock on these inodes as the lock does not
+		 * really belong to any process and lockdep cannot handle
+		 * that */
+		OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL;
+	} else {
+		lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres.
+								l_lockdep_map,
+				 ocfs2_system_inodes[type].si_name,
+				 &ocfs2_sysfile_cluster_lock_key[type], 0);
+	}
+#endif
 bail:
 
 	return inode;
-- 
cgit v1.2.3


From d246ab307d1d003c80fe279897dea22bf52b6e41 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 18 Jun 2009 13:12:06 +0800
Subject: ocfs2/trivial: Wrap ocfs2_sysfile_cluster_lock_key within define.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Actually ocfs2_sysfile_cluster_lock_key is only used if we enable
CONFIG_DEBUG_LOCK_ALLOC. Wrap it so that we can avoid a building
warning.
fs/ocfs2/sysfile.c:53: warning: ‘ocfs2_sysfile_cluster_lock_key’
defined but not used

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/sysfile.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 6f53f5e7256a..40e53702948c 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -50,7 +50,9 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb,
 					   int type,
 					   u32 slot);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
+#endif
 
 static inline int is_global_system_inode(int type)
 {
-- 
cgit v1.2.3


From 4839641333d4593bfc4fb29aa3af10d36f607d5b Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 23 Jun 2009 01:34:19 +0100
Subject: jffs2: fix another potential leak on error path in scan.c

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/scan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 7515e73e2bfb..696686cc206e 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 	if (jffs2_sum_active()) {
 		s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
 		if (!s) {
-			kfree(flashbuf);
 			JFFS2_WARNING("Can't allocate memory for summary\n");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto out;
 		}
 	}
 
-- 
cgit v1.2.3


From 3391faa4f18e4e33666d3d24e90e3086fcf9b922 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 22 Jun 2009 23:12:29 +0200
Subject: udf: remove redundant tests on unsigned

first_block and goal are unsigned. When negative they are wrapped and caught by
the other test.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/balloc.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index e48e9a3af763..1e068535b58b 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -238,7 +238,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
 
 	mutex_lock(&sbi->s_alloc_mutex);
 	part_len = sbi->s_partmaps[partition].s_partition_len;
-	if (first_block < 0 || first_block >= part_len)
+	if (first_block >= part_len)
 		goto out;
 
 	if (first_block + block_count > part_len)
@@ -297,7 +297,7 @@ static int udf_bitmap_new_block(struct super_block *sb,
 	mutex_lock(&sbi->s_alloc_mutex);
 
 repeat:
-	if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len)
+	if (goal >= sbi->s_partmaps[partition].s_partition_len)
 		goal = 0;
 
 	nr_groups = bitmap->s_nr_groups;
@@ -666,8 +666,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 	int8_t etype = -1;
 	struct udf_inode_info *iinfo;
 
-	if (first_block < 0 ||
-		first_block >= sbi->s_partmaps[partition].s_partition_len)
+	if (first_block >= sbi->s_partmaps[partition].s_partition_len)
 		return 0;
 
 	iinfo = UDF_I(table);
@@ -743,7 +742,7 @@ static int udf_table_new_block(struct super_block *sb,
 		return newblock;
 
 	mutex_lock(&sbi->s_alloc_mutex);
-	if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len)
+	if (goal >= sbi->s_partmaps[partition].s_partition_len)
 		goal = 0;
 
 	/* We search for the closest matching block to goal. If we find
-- 
cgit v1.2.3


From b5450d9c84bdd38b261922057cd167da51dfae93 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Jun 2009 10:30:07 +0200
Subject: reiserfs: remove stray unlock_super in reiserfs_resize

Reiserfs doesn't use lock_super anywhere internally, and ->remount_fs
which calls reiserfs_resize does have it currently but also expects it
to be held on return, so there's no business for the unlock_super here.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked by Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/resize.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 238e9d9b31e0..18b315d3d104 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -82,7 +82,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 		if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
 			printk
 			    ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
-			unlock_super(s);
 			return -ENOMEM;
 		}
 		/* the new journal bitmaps are zero filled, now we copy in the bitmap
-- 
cgit v1.2.3


From 654f562c526cf9dfb8d453f687341fe0777ee454 Mon Sep 17 00:00:00 2001
From: "J. R. Okajima" <hooanon05@yahoo.co.jp>
Date: Thu, 18 Jun 2009 23:30:15 +0900
Subject: vfs: fix nd->root leak in do_filp_open()

commit 2a737871108de9ba8930f7650d549f1383767f8b "Cache root in nameidata"
introduced a new member nd->root, but forgot to put it in do_filp_open().

Signed-off-by: J. R. Okajima <hooanon05@yahoo.co.jp>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 527119afb6a5..5b961eb71cbf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1698,8 +1698,11 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (error)
 		return ERR_PTR(error);
 	error = path_walk(pathname, &nd);
-	if (error)
+	if (error) {
+		if (nd.root.mnt)
+			path_put(&nd.root);
 		return ERR_PTR(error);
+	}
 	if (unlikely(!audit_dummy_context()))
 		audit_inode(pathname, nd.path.dentry);
 
@@ -1759,6 +1762,8 @@ do_last:
 		}
 		filp = nameidata_to_filp(&nd, open_flag);
 		mnt_drop_write(nd.path.mnt);
+		if (nd.root.mnt)
+			path_put(&nd.root);
 		return filp;
 	}
 
@@ -1819,6 +1824,8 @@ ok:
 	 */
 	if (will_write)
 		mnt_drop_write(nd.path.mnt);
+	if (nd.root.mnt)
+		path_put(&nd.root);
 	return filp;
 
 exit_mutex_unlock:
@@ -1859,6 +1866,8 @@ do_link:
 		 * with "intent.open".
 		 */
 		release_open_intent(&nd);
+		if (nd.root.mnt)
+			path_put(&nd.root);
 		return ERR_PTR(error);
 	}
 	nd.flags &= ~LOOKUP_PARENT;
-- 
cgit v1.2.3


From 3b22edc5730b87d360ee7dd7143397ba09b73a47 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Jun 2009 17:29:49 -0400
Subject: VFS: Switch init_mount_tree() to use the new create_mnt_ns() helper

Eliminates some duplicated code...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index a7bea8c8bd46..4a86b8595164 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2222,16 +2222,9 @@ static void __init init_mount_tree(void)
 	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
-	ns = kmalloc(sizeof(*ns), GFP_KERNEL);
-	if (!ns)
+	ns = create_mnt_ns(mnt);
+	if (IS_ERR(ns))
 		panic("Can't allocate initial namespace");
-	atomic_set(&ns->count, 1);
-	INIT_LIST_HEAD(&ns->list);
-	init_waitqueue_head(&ns->poll);
-	ns->event = 0;
-	list_add(&mnt->mnt_list, &ns->list);
-	ns->root = mnt;
-	mnt->mnt_ns = ns;
 
 	init_task.nsproxy->mnt_ns = ns;
 	get_mnt_ns(ns);
-- 
cgit v1.2.3


From f6cc746bbb3b8a8ceb8514a7906ba582607a8cf7 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 16 Jun 2009 21:15:04 -0700
Subject: devpts: remove module-related code

These days, the devpts filesystem is closely integrated with the pty
memory management, and cannot be built as a module, even less removed
from the kernel.  Accordingly, remove all module-related stuff from
this filesystem.

[ v2: only remove code that's actually dead ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/devpts/inode.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'fs')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 9b1d285f9fe6..75efb028974b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -423,7 +423,6 @@ static void devpts_kill_sb(struct super_block *sb)
 }
 
 static struct file_system_type devpts_fs_type = {
-	.owner		= THIS_MODULE,
 	.name		= "devpts",
 	.get_sb		= devpts_get_sb,
 	.kill_sb	= devpts_kill_sb,
@@ -564,13 +563,4 @@ static int __init init_devpts_fs(void)
 	}
 	return err;
 }
-
-static void __exit exit_devpts_fs(void)
-{
-	unregister_filesystem(&devpts_fs_type);
-	mntput(devpts_mnt);
-}
-
 module_init(init_devpts_fs)
-module_exit(exit_devpts_fs)
-MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From c63e09ecccb50f930e899d7005edc5411ee86d4f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 24 Jun 2009 02:05:18 -0400
Subject: Make allocation of anon devices cheaper

Standard trick - add a new variable (start) such that
for each n < start n is known to be busy.  Allocation can
skip checking everything in [0..start) and if it returns
n, we can set start to n + 1.  Freeing below start sets
start to what we'd just freed.

Of course, it still sucks if we do something like
	free 0
	allocate
	allocate
in a loop - still O(n^2) time.  However, on saner loads it
improves the things a lot and the entire thing is not worth
the trouble of switching to something with better worst-case
behaviour.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index d40d53a22fb5..808ffd59e01b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -608,6 +608,7 @@ void emergency_remount(void)
 
 static DEFINE_IDA(unnamed_dev_ida);
 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
+static int unnamed_dev_start = 0; /* don't bother trying below it */
 
 int set_anon_super(struct super_block *s, void *data)
 {
@@ -618,7 +619,8 @@ int set_anon_super(struct super_block *s, void *data)
 	if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
 		return -ENOMEM;
 	spin_lock(&unnamed_dev_lock);
-	error = ida_get_new(&unnamed_dev_ida, &dev);
+	error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
+	unnamed_dev_start = dev + 1;
 	spin_unlock(&unnamed_dev_lock);
 	if (error == -EAGAIN)
 		/* We raced and lost with another CPU. */
@@ -629,6 +631,7 @@ int set_anon_super(struct super_block *s, void *data)
 	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		ida_remove(&unnamed_dev_ida, dev);
+		unnamed_dev_start = dev;
 		spin_unlock(&unnamed_dev_lock);
 		return -EMFILE;
 	}
@@ -645,6 +648,8 @@ void kill_anon_super(struct super_block *sb)
 	generic_shutdown_super(sb);
 	spin_lock(&unnamed_dev_lock);
 	ida_remove(&unnamed_dev_ida, slot);
+	if (slot < unnamed_dev_start)
+		unnamed_dev_start = slot;
 	spin_unlock(&unnamed_dev_lock);
 }
 
-- 
cgit v1.2.3


From f21f62208a6f60e2e05440b2e438d9541822dc4d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 24 Jun 2009 03:12:00 -0400
Subject: ... and the same for vfsmount id/mount group id

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 26 ++++++++++++++++++++++----
 fs/super.c     |  6 ++++--
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 4a86b8595164..3dc283fd4716 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -42,6 +42,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 static int event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);
+static int mnt_id_start = 0;
+static int mnt_group_start = 1;
 
 static struct list_head *mount_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +71,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
 retry:
 	ida_pre_get(&mnt_id_ida, GFP_KERNEL);
 	spin_lock(&vfsmount_lock);
-	res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
+	res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
+	if (!res)
+		mnt_id_start = mnt->mnt_id + 1;
 	spin_unlock(&vfsmount_lock);
 	if (res == -EAGAIN)
 		goto retry;
@@ -79,8 +83,11 @@ retry:
 
 static void mnt_free_id(struct vfsmount *mnt)
 {
+	int id = mnt->mnt_id;
 	spin_lock(&vfsmount_lock);
-	ida_remove(&mnt_id_ida, mnt->mnt_id);
+	ida_remove(&mnt_id_ida, id);
+	if (mnt_id_start > id)
+		mnt_id_start = id;
 	spin_unlock(&vfsmount_lock);
 }
 
@@ -91,10 +98,18 @@ static void mnt_free_id(struct vfsmount *mnt)
  */
 static int mnt_alloc_group_id(struct vfsmount *mnt)
 {
+	int res;
+
 	if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
 		return -ENOMEM;
 
-	return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
+	res = ida_get_new_above(&mnt_group_ida,
+				mnt_group_start,
+				&mnt->mnt_group_id);
+	if (!res)
+		mnt_group_start = mnt->mnt_group_id + 1;
+
+	return res;
 }
 
 /*
@@ -102,7 +117,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
  */
 void mnt_release_group_id(struct vfsmount *mnt)
 {
-	ida_remove(&mnt_group_ida, mnt->mnt_group_id);
+	int id = mnt->mnt_group_id;
+	ida_remove(&mnt_group_ida, id);
+	if (mnt_group_start > id)
+		mnt_group_start = id;
 	mnt->mnt_group_id = 0;
 }
 
diff --git a/fs/super.c b/fs/super.c
index 808ffd59e01b..2761d3e22ed9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -620,7 +620,8 @@ int set_anon_super(struct super_block *s, void *data)
 		return -ENOMEM;
 	spin_lock(&unnamed_dev_lock);
 	error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
-	unnamed_dev_start = dev + 1;
+	if (!error)
+		unnamed_dev_start = dev + 1;
 	spin_unlock(&unnamed_dev_lock);
 	if (error == -EAGAIN)
 		/* We raced and lost with another CPU. */
@@ -631,7 +632,8 @@ int set_anon_super(struct super_block *s, void *data)
 	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		ida_remove(&unnamed_dev_ida, dev);
-		unnamed_dev_start = dev;
+		if (unnamed_dev_start > dev)
+			unnamed_dev_start = dev;
 		spin_unlock(&unnamed_dev_lock);
 		return -EMFILE;
 	}
-- 
cgit v1.2.3


From 01c031945f2755c7afaaf456088543312f2b72ea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 13:35:40 +0200
Subject: cleanup __writeback_single_inode

There is no reason to for the split between __writeback_single_inode and
__sync_single_inode, the former just does a couple of checks before
tail-calling the latter.  So merge the two, and while we're at it split
out the I_SYNC waiting case for data integrity writers, as it's
logically separate function.  Finally rename __writeback_single_inode to
writeback_single_inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs-writeback.c | 100 +++++++++++++++++++++++++++---------------------------
 1 file changed, 50 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index caf049146ca2..c54226be5294 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -278,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb)
 EXPORT_SYMBOL(sb_has_dirty_inodes);
 
 /*
- * Write a single inode's dirty pages and inode data out to disk.
+ * Wait for writeback on an inode to complete.
+ */
+static void inode_wait_for_writeback(struct inode *inode)
+{
+	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
+	wait_queue_head_t *wqh;
+
+	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+	do {
+		spin_unlock(&inode_lock);
+		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
+		spin_lock(&inode_lock);
+	} while (inode->i_state & I_SYNC);
+}
+
+/*
+ * Write out an inode's dirty pages.  Called under inode_lock.  Either the
+ * caller has ref on the inode (either via __iget or via syscall against an fd)
+ * or the inode has I_WILL_FREE set (via generic_forget_inode)
+ *
  * If `wait' is set, wait on the writeout.
  *
  * The whole writeout design is quite complex and fragile.  We want to avoid
@@ -288,13 +307,38 @@ EXPORT_SYMBOL(sb_has_dirty_inodes);
  * Called under inode_lock.
  */
 static int
-__sync_single_inode(struct inode *inode, struct writeback_control *wbc)
+writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	unsigned dirty;
 	struct address_space *mapping = inode->i_mapping;
 	int wait = wbc->sync_mode == WB_SYNC_ALL;
+	unsigned dirty;
 	int ret;
 
+	if (!atomic_read(&inode->i_count))
+		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
+	else
+		WARN_ON(inode->i_state & I_WILL_FREE);
+
+	if (inode->i_state & I_SYNC) {
+		/*
+		 * If this inode is locked for writeback and we are not doing
+		 * writeback-for-data-integrity, move it to s_more_io so that
+		 * writeback can proceed with the other inodes on s_io.
+		 *
+		 * We'll have another go at writing back this inode when we
+		 * completed a full scan of s_io.
+		 */
+		if (!wait) {
+			requeue_io(inode);
+			return 0;
+		}
+
+		/*
+		 * It's a data-integrity sync.  We must wait.
+		 */
+		inode_wait_for_writeback(inode);
+	}
+
 	BUG_ON(inode->i_state & I_SYNC);
 
 	/* Set I_SYNC, reset I_DIRTY */
@@ -389,50 +433,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }
 
-/*
- * Write out an inode's dirty pages.  Called under inode_lock.  Either the
- * caller has ref on the inode (either via __iget or via syscall against an fd)
- * or the inode has I_WILL_FREE set (via generic_forget_inode)
- */
-static int
-__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	wait_queue_head_t *wqh;
-
-	if (!atomic_read(&inode->i_count))
-		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
-	else
-		WARN_ON(inode->i_state & I_WILL_FREE);
-
-	if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
-		/*
-		 * We're skipping this inode because it's locked, and we're not
-		 * doing writeback-for-data-integrity.  Move it to s_more_io so
-		 * that writeback can proceed with the other inodes on s_io.
-		 * We'll have another go at writing back this inode when we
-		 * completed a full scan of s_io.
-		 */
-		requeue_io(inode);
-		return 0;
-	}
-
-	/*
-	 * It's a data-integrity sync.  We must wait.
-	 */
-	if (inode->i_state & I_SYNC) {
-		DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
-
-		wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
-		do {
-			spin_unlock(&inode_lock);
-			__wait_on_bit(wqh, &wq, inode_wait,
-							TASK_UNINTERRUPTIBLE);
-			spin_lock(&inode_lock);
-		} while (inode->i_state & I_SYNC);
-	}
-	return __sync_single_inode(inode, wbc);
-}
-
 /*
  * Write out a superblock's list of dirty inodes.  A wait will be performed
  * upon no inodes, all inodes or the final one, depending upon sync_mode.
@@ -526,7 +526,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
 		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
-		__writeback_single_inode(inode, wbc);
+		writeback_single_inode(inode, wbc);
 		if (current_is_pdflush())
 			writeback_release(bdi);
 		if (wbc->pages_skipped != pages_skipped) {
@@ -708,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync)
 
 	might_sleep();
 	spin_lock(&inode_lock);
-	ret = __writeback_single_inode(inode, &wbc);
+	ret = writeback_single_inode(inode, &wbc);
 	spin_unlock(&inode_lock);
 	if (sync)
 		inode_sync_wait(inode);
@@ -732,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 	int ret;
 
 	spin_lock(&inode_lock);
-	ret = __writeback_single_inode(inode, wbc);
+	ret = writeback_single_inode(inode, wbc);
 	spin_unlock(&inode_lock);
 	return ret;
 }
-- 
cgit v1.2.3


From 3e63cbb1efca7dd3137de1bb475e2e068e38ef23 Mon Sep 17 00:00:00 2001
From: Ankit Jain <me@ankitjain.org>
Date: Fri, 19 Jun 2009 14:28:07 -0400
Subject: fs: Add new pre-allocation ioctls to vfs for compatibility with
 legacy xfs ioctls

This patch adds ioctls to vfs for compatibility with legacy XFS
pre-allocation ioctls (XFS_IOC_*RESVP*). The implementation
effectively invokes sys_fallocate for the new ioctls.
Also handles the compat_ioctl case.
Note: These legacy ioctls are also implemented by OCFS2.

[AV: folded fixes from hch]

Signed-off-by: Ankit Jain <me@ankitjain.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat_ioctl.c | 48 +++++++++++++++++++++++++++++++++++++++++++++
 fs/ioctl.c        | 35 +++++++++++++++++++++++++++++++++
 fs/open.c         | 58 +++++++++++++++++++++++++++----------------------------
 3 files changed, 112 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c135202c38b3..626c7483b4de 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -31,6 +31,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/vt.h>
+#include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/ppp_defs.h>
@@ -1779,6 +1780,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return sys_ioctl(fd, cmd, (unsigned long)tn);
 }
 
+/* on ia32 l_start is on a 32-bit boundary */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+struct space_resv_32 {
+	__s16		l_type;
+	__s16		l_whence;
+	__s64		l_start	__attribute__((packed));
+			/* len == 0 means until end of file */
+	__s64		l_len __attribute__((packed));
+	__s32		l_sysid;
+	__u32		l_pid;
+	__s32		l_pad[4];	/* reserve area */
+};
+
+#define FS_IOC_RESVSP_32		_IOW ('X', 40, struct space_resv_32)
+#define FS_IOC_RESVSP64_32	_IOW ('X', 42, struct space_resv_32)
+
+/* just account for different alignment */
+static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
+{
+	struct space_resv_32	__user *p32 = (void __user *)arg;
+	struct space_resv	__user *p = compat_alloc_user_space(sizeof(*p));
+
+	if (copy_in_user(&p->l_type,	&p32->l_type,	sizeof(s16)) ||
+	    copy_in_user(&p->l_whence,	&p32->l_whence, sizeof(s16)) ||
+	    copy_in_user(&p->l_start,	&p32->l_start,	sizeof(s64)) ||
+	    copy_in_user(&p->l_len,	&p32->l_len,	sizeof(s64)) ||
+	    copy_in_user(&p->l_sysid,	&p32->l_sysid,	sizeof(s32)) ||
+	    copy_in_user(&p->l_pid,	&p32->l_pid,	sizeof(u32)) ||
+	    copy_in_user(&p->l_pad,	&p32->l_pad,	4*sizeof(u32)))
+		return -EFAULT;
+
+	return ioctl_preallocate(file, p);
+}
+#endif
+
 
 typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
 					unsigned long, struct file *);
@@ -2756,6 +2792,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 	case FIOQSIZE:
 		break;
 
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+	case FS_IOC_RESVSP_32:
+	case FS_IOC_RESVSP64_32:
+		error = compat_ioctl_preallocate(filp, arg);
+		goto out_fput;
+#else
+	case FS_IOC_RESVSP:
+	case FS_IOC_RESVSP64:
+		error = ioctl_preallocate(filp, (void __user *)arg);
+		goto out_fput;
+#endif
+
 	case FIBMAP:
 	case FIGETBSZ:
 	case FIONREAD:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 001f8d3118f2..5612880fcbe7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
+#include <linux/falloc.h>
 
 #include <asm/ioctls.h>
 
@@ -403,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap);
 
 #endif  /*  CONFIG_BLOCK  */
 
+/*
+ * This provides compatibility with legacy XFS pre-allocation ioctls
+ * which predate the fallocate syscall.
+ *
+ * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
+ * are used here, rest are ignored.
+ */
+int ioctl_preallocate(struct file *filp, void __user *argp)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct space_resv sr;
+
+	if (copy_from_user(&sr, argp, sizeof(sr)))
+		return -EFAULT;
+
+	switch (sr.l_whence) {
+	case SEEK_SET:
+		break;
+	case SEEK_CUR:
+		sr.l_start += filp->f_pos;
+		break;
+	case SEEK_END:
+		sr.l_start += i_size_read(inode);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
+}
+
 static int file_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -414,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
 		return ioctl_fibmap(filp, p);
 	case FIONREAD:
 		return put_user(i_size_read(inode) - filp->f_pos, p);
+	case FS_IOC_RESVSP:
+	case FS_IOC_RESVSP64:
+		return ioctl_preallocate(filp, p);
 	}
 
 	return vfs_ioctl(filp, cmd, arg);
diff --git a/fs/open.c b/fs/open.c
index 7200e23d9258..dd98e8076024 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
 #endif
 #endif /* BITS_PER_LONG == 32 */
 
-SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
+
+int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
-	struct file *file;
-	struct inode *inode;
-	long ret = -EINVAL;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	long ret;
 
 	if (offset < 0 || len <= 0)
-		goto out;
+		return -EINVAL;
 
 	/* Return error if mode is not supported */
-	ret = -EOPNOTSUPP;
 	if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
-		goto out;
+		return -EOPNOTSUPP;
 
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
 	if (!(file->f_mode & FMODE_WRITE))
-		goto out_fput;
+		return -EBADF;
 	/*
 	 * Revalidate the write permissions, in case security policy has
 	 * changed since the files were opened.
 	 */
 	ret = security_file_permission(file, MAY_WRITE);
 	if (ret)
-		goto out_fput;
+		return ret;
 
-	inode = file->f_path.dentry->d_inode;
-
-	ret = -ESPIPE;
 	if (S_ISFIFO(inode->i_mode))
-		goto out_fput;
+		return -ESPIPE;
 
-	ret = -ENODEV;
 	/*
 	 * Let individual file system decide if it supports preallocation
 	 * for directories or not.
 	 */
 	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
-		goto out_fput;
+		return -ENODEV;
 
-	ret = -EFBIG;
 	/* Check for wrap through zero too */
 	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
-		goto out_fput;
+		return -EFBIG;
 
-	if (inode->i_op->fallocate)
-		ret = inode->i_op->fallocate(inode, mode, offset, len);
-	else
-		ret = -EOPNOTSUPP;
+	if (!inode->i_op->fallocate)
+		return -EOPNOTSUPP;
 
-out_fput:
-	fput(file);
-out:
-	return ret;
+	return inode->i_op->fallocate(inode, mode, offset, len);
 }
+
+SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
+{
+	struct file *file;
+	int error = -EBADF;
+
+	file = fget(fd);
+	if (file) {
+		error = do_fallocate(file, mode, offset, len);
+		fput(file);
+	}
+
+	return error;
+}
+
 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
 {
-- 
cgit v1.2.3


From f19d4a8fa6f9b6ccf54df0971c97ffcaa390b7b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:50:45 -0400
Subject: add caching of ACLs in struct inode

No helpers, no conversions yet.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index f643be565df8..e193cd592fa8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
+#include <linux/posix_acl.h>
 
 /*
  * This is needed for the following functions:
@@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	}
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
+#ifdef CONFIG_FS_POSIX_ACL
+	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
+#endif
 
 #ifdef CONFIG_FSNOTIFY
 	inode->i_fsnotify_mask = 0;
@@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode)
 	ima_inode_free(inode);
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
+#ifdef CONFIG_FS_POSIX_ACL
+	if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
+		posix_acl_release(inode->i_acl);
+	if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
+		posix_acl_release(inode->i_default_acl);
+#endif
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
-- 
cgit v1.2.3


From 5e78b435683daaaacadad1b2aeefb8904cf6acfb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:52:55 -0400
Subject: switch ext2 to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/acl.c   | 24 +++++++++++-------------
 fs/ext2/acl.h   |  4 ----
 fs/ext2/ext2.h  |  4 ----
 fs/ext2/inode.c |  4 ----
 fs/ext2/super.c | 16 ----------------
 5 files changed, 11 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d46e38cb85c5..d2ffddc12117 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -128,10 +128,10 @@ fail:
 static inline struct posix_acl *
 ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 {
-	struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
+	struct posix_acl *acl = ACL_NOT_CACHED;
 
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT2_ACL_NOT_CACHED)
+	if (*i_acl != ACL_NOT_CACHED)
 		acl = posix_acl_dup(*i_acl);
 	spin_unlock(&inode->i_lock);
 
@@ -143,7 +143,7 @@ ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 		   struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT2_ACL_NOT_CACHED)
+	if (*i_acl != ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
 	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
@@ -155,7 +155,6 @@ ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 static struct posix_acl *
 ext2_get_acl(struct inode *inode, int type)
 {
-	struct ext2_inode_info *ei = EXT2_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -166,15 +165,15 @@ ext2_get_acl(struct inode *inode, int type)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			acl = ext2_iget_acl(inode, &ei->i_acl);
-			if (acl != EXT2_ACL_NOT_CACHED)
+			acl = ext2_iget_acl(inode, &inode->i_acl);
+			if (acl != ACL_NOT_CACHED)
 				return acl;
 			name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			acl = ext2_iget_acl(inode, &ei->i_default_acl);
-			if (acl != EXT2_ACL_NOT_CACHED)
+			acl = ext2_iget_acl(inode, &inode->i_default_acl);
+			if (acl != ACL_NOT_CACHED)
 				return acl;
 			name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
 			break;
@@ -200,11 +199,11 @@ ext2_get_acl(struct inode *inode, int type)
 	if (!IS_ERR(acl)) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext2_iset_acl(inode, &ei->i_acl, acl);
+				ext2_iset_acl(inode, &inode->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext2_iset_acl(inode, &ei->i_default_acl, acl);
+				ext2_iset_acl(inode, &inode->i_default_acl, acl);
 				break;
 		}
 	}
@@ -217,7 +216,6 @@ ext2_get_acl(struct inode *inode, int type)
 static int
 ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-	struct ext2_inode_info *ei = EXT2_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -266,11 +264,11 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	if (!error) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext2_iset_acl(inode, &ei->i_acl, acl);
+				ext2_iset_acl(inode, &inode->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext2_iset_acl(inode, &ei->i_default_acl, acl);
+				ext2_iset_acl(inode, &inode->i_default_acl, acl);
 				break;
 		}
 	}
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index b42cf578554b..ecefe478898f 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -53,10 +53,6 @@ static inline int ext2_acl_count(size_t size)
 
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
 
-/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT2_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext2_permission (struct inode *, int);
 extern int ext2_acl_chmod (struct inode *);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index d988a718aedb..9a8a8e27a063 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -46,10 +46,6 @@ struct ext2_inode_info {
 	 * EAs.
 	 */
 	struct rw_semaphore xattr_sem;
-#endif
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
 #endif
 	rwlock_t i_meta_lock;
 
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 29ed682061f6..e27130341d4f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1224,10 +1224,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 		return inode;
 
 	ei = EXT2_I(inode);
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-	ei->i_acl = EXT2_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 
 	raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 458999638c3d..1a9ffee47d56 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -152,10 +152,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
 	ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-	ei->i_acl = EXT2_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
 	return &ei->vfs_inode;
@@ -198,18 +194,6 @@ static void destroy_inodecache(void)
 static void ext2_clear_inode(struct inode *inode)
 {
 	struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-	struct ext2_inode_info *ei = EXT2_I(inode);
-
-	if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
-		posix_acl_release(ei->i_acl);
-		ei->i_acl = EXT2_ACL_NOT_CACHED;
-	}
-	if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
-		posix_acl_release(ei->i_default_acl);
-		ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-	}
-#endif
 	ext2_discard_reservation(inode);
 	EXT2_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
-- 
cgit v1.2.3


From 6582a0e6f6bc7bf64817b9e1a424782855292ab0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:53:58 -0400
Subject: switch ext3 to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext3/acl.c   | 22 ++++++++++------------
 fs/ext3/acl.h   |  4 ----
 fs/ext3/inode.c |  4 ----
 fs/ext3/super.c | 16 ----------------
 4 files changed, 10 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e0c745451715..a9707689d9e1 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -134,7 +134,7 @@ ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 	if (acl) {
 		spin_lock(&inode->i_lock);
 		acl = *i_acl;
-		if (acl != EXT3_ACL_NOT_CACHED)
+		if (acl != ACL_NOT_CACHED)
 			acl = posix_acl_dup(acl);
 		spin_unlock(&inode->i_lock);
 	}
@@ -147,7 +147,7 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
                   struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT3_ACL_NOT_CACHED)
+	if (*i_acl != ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
 	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
@@ -161,7 +161,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 static struct posix_acl *
 ext3_get_acl(struct inode *inode, int type)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -172,15 +171,15 @@ ext3_get_acl(struct inode *inode, int type)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			acl = ext3_iget_acl(inode, &ei->i_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
+			acl = ext3_iget_acl(inode, &inode->i_acl);
+			if (acl != ACL_NOT_CACHED)
 				return acl;
 			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			acl = ext3_iget_acl(inode, &ei->i_default_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
+			acl = ext3_iget_acl(inode, &inode->i_default_acl);
+			if (acl != ACL_NOT_CACHED)
 				return acl;
 			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
 			break;
@@ -206,11 +205,11 @@ ext3_get_acl(struct inode *inode, int type)
 	if (!IS_ERR(acl)) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
+				ext3_iset_acl(inode, &inode->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				ext3_iset_acl(inode, &inode->i_default_acl, acl);
 				break;
 		}
 	}
@@ -226,7 +225,6 @@ static int
 ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 	     struct posix_acl *acl)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -274,11 +272,11 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 	if (!error) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
+				ext3_iset_acl(inode, &inode->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				ext3_iset_acl(inode, &inode->i_default_acl, acl);
 				break;
 		}
 	}
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 42da16b8cac0..07d15a3a5969 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size)
 
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
 
-/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT3_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext3_permission (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 05dea8132fc0..5f51fed5c750 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2752,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 		return inode;
 
 	ei = EXT3_I(inode);
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 
 	ret = __ext3_get_inode_loc(inode, &iloc, 0);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 601e881e6105..524b349c6299 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
 	return &ei->vfs_inode;
@@ -518,18 +514,6 @@ static void destroy_inodecache(void)
 static void ext3_clear_inode(struct inode *inode)
 {
 	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	if (EXT3_I(inode)->i_acl &&
-			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_acl);
-		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
-	}
-	if (EXT3_I(inode)->i_default_acl &&
-			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_default_acl);
-		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
-	}
-#endif
 	ext3_discard_reservation(inode);
 	EXT3_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
-- 
cgit v1.2.3


From d4bfe2f76d785cc77611a4bda8cedaff358d8c7d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:54:26 -0400
Subject: switch ext4 to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/acl.c   | 22 ++++++++++------------
 fs/ext4/acl.h   |  4 ----
 fs/ext4/ext4.h  |  4 ----
 fs/ext4/inode.c |  4 ----
 fs/ext4/super.c | 16 ----------------
 5 files changed, 10 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 605aeed96d68..0084e3a19d86 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -134,7 +134,7 @@ ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 	if (acl) {
 		spin_lock(&inode->i_lock);
 		acl = *i_acl;
-		if (acl != EXT4_ACL_NOT_CACHED)
+		if (acl != ACL_NOT_CACHED)
 			acl = posix_acl_dup(acl);
 		spin_unlock(&inode->i_lock);
 	}
@@ -147,7 +147,7 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 		struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT4_ACL_NOT_CACHED)
+	if (*i_acl != ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
 	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
@@ -161,7 +161,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 static struct posix_acl *
 ext4_get_acl(struct inode *inode, int type)
 {
-	struct ext4_inode_info *ei = EXT4_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -172,15 +171,15 @@ ext4_get_acl(struct inode *inode, int type)
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		acl = ext4_iget_acl(inode, &ei->i_acl);
-		if (acl != EXT4_ACL_NOT_CACHED)
+		acl = ext4_iget_acl(inode, &inode->i_acl);
+		if (acl != ACL_NOT_CACHED)
 			return acl;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 		break;
 
 	case ACL_TYPE_DEFAULT:
-		acl = ext4_iget_acl(inode, &ei->i_default_acl);
-		if (acl != EXT4_ACL_NOT_CACHED)
+		acl = ext4_iget_acl(inode, &inode->i_default_acl);
+		if (acl != ACL_NOT_CACHED)
 			return acl;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
 		break;
@@ -206,11 +205,11 @@ ext4_get_acl(struct inode *inode, int type)
 	if (!IS_ERR(acl)) {
 		switch (type) {
 		case ACL_TYPE_ACCESS:
-			ext4_iset_acl(inode, &ei->i_acl, acl);
+			ext4_iset_acl(inode, &inode->i_acl, acl);
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			ext4_iset_acl(inode, &ei->i_default_acl, acl);
+			ext4_iset_acl(inode, &inode->i_default_acl, acl);
 			break;
 		}
 	}
@@ -226,7 +225,6 @@ static int
 ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 	     struct posix_acl *acl)
 {
-	struct ext4_inode_info *ei = EXT4_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -274,11 +272,11 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 	if (!error) {
 		switch (type) {
 		case ACL_TYPE_ACCESS:
-			ext4_iset_acl(inode, &ei->i_acl, acl);
+			ext4_iset_acl(inode, &inode->i_acl, acl);
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			ext4_iset_acl(inode, &ei->i_default_acl, acl);
+			ext4_iset_acl(inode, &inode->i_default_acl, acl);
 			break;
 		}
 	}
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cb45257a246e..949789d2bba6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size)
 
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 
-/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT4_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext4_permission(struct inode *, int);
 extern int ext4_acl_chmod(struct inode *);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 17b9998680e3..0ddf7e55abe1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -595,10 +595,6 @@ struct ext4_inode_info {
 	 */
 	struct rw_semaphore xattr_sem;
 #endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
 
 	struct list_head i_orphan;	/* unlinked but open inodes */
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c17ae275af4..60a26f3a6f8b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4453,10 +4453,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 		return inode;
 
 	ei = EXT4_I(inode);
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	ei->i_acl = EXT4_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
-#endif
 
 	ret = __ext4_get_inode_loc(inode, &iloc, 0);
 	if (ret < 0)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8bb9e2d3e4b8..8f4f079e6b9a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -666,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	if (!ei)
 		return NULL;
 
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	ei->i_acl = EXT4_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
-#endif
 	ei->vfs_inode.i_version = 1;
 	ei->vfs_inode.i_data.writeback_index = 0;
 	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -735,18 +731,6 @@ static void destroy_inodecache(void)
 
 static void ext4_clear_inode(struct inode *inode)
 {
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	if (EXT4_I(inode)->i_acl &&
-			EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
-		posix_acl_release(EXT4_I(inode)->i_acl);
-		EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
-	}
-	if (EXT4_I(inode)->i_default_acl &&
-			EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
-		posix_acl_release(EXT4_I(inode)->i_default_acl);
-		EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
-	}
-#endif
 	ext4_discard_preallocations(inode);
 	if (EXT4_JOURNAL(inode))
 		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
-- 
cgit v1.2.3


From 05fc0790b6c9c611129f2f712d00b6a8a364e8d2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:54:52 -0400
Subject: switch jfs to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/jfs/acl.c        | 22 +++++++++-------------
 fs/jfs/jfs_incore.h |  6 ------
 fs/jfs/super.c      | 16 ----------------
 fs/jfs/xattr.c      | 12 ++++++------
 4 files changed, 15 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 06ca1b8d2054..5fcfc9857c11 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -31,7 +31,6 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 {
 	struct posix_acl *acl;
 	char *ea_name;
-	struct jfs_inode_info *ji = JFS_IP(inode);
 	struct posix_acl **p_acl;
 	int size;
 	char *value = NULL;
@@ -39,17 +38,17 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = POSIX_ACL_XATTR_ACCESS;
-			p_acl = &ji->i_acl;
+			p_acl = &inode->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
 			ea_name = POSIX_ACL_XATTR_DEFAULT;
-			p_acl = &ji->i_default_acl;
+			p_acl = &inode->i_default_acl;
 			break;
 		default:
 			return ERR_PTR(-EINVAL);
 	}
 
-	if (*p_acl != JFS_ACL_NOT_CACHED)
+	if (*p_acl != ACL_NOT_CACHED)
 		return posix_acl_dup(*p_acl);
 
 	size = __jfs_getxattr(inode, ea_name, NULL, 0);
@@ -80,7 +79,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 		       struct posix_acl *acl)
 {
 	char *ea_name;
-	struct jfs_inode_info *ji = JFS_IP(inode);
 	struct posix_acl **p_acl;
 	int rc;
 	int size = 0;
@@ -92,11 +90,11 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = POSIX_ACL_XATTR_ACCESS;
-			p_acl = &ji->i_acl;
+			p_acl = &inode->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
 			ea_name = POSIX_ACL_XATTR_DEFAULT;
-			p_acl = &ji->i_default_acl;
+			p_acl = &inode->i_default_acl;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
 			break;
@@ -117,7 +115,7 @@ out:
 	kfree(value);
 
 	if (!rc) {
-		if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED))
+		if (*p_acl && (*p_acl != ACL_NOT_CACHED))
 			posix_acl_release(*p_acl);
 		*p_acl = posix_acl_dup(acl);
 	}
@@ -126,17 +124,15 @@ out:
 
 static int jfs_check_acl(struct inode *inode, int mask)
 {
-	struct jfs_inode_info *ji = JFS_IP(inode);
-
-	if (ji->i_acl == JFS_ACL_NOT_CACHED) {
+	if (inode->i_acl == ACL_NOT_CACHED) {
 		struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 		posix_acl_release(acl);
 	}
 
-	if (ji->i_acl)
-		return posix_acl_permission(inode, ji->i_acl, mask);
+	if (inode->i_acl)
+		return posix_acl_permission(inode, inode->i_acl, mask);
 	return -EAGAIN;
 }
 
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 439901d205fe..1439f119ec83 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -74,10 +74,6 @@ struct jfs_inode_info {
 	/* xattr_sem allows us to access the xattrs without taking i_mutex */
 	struct rw_semaphore xattr_sem;
 	lid_t	xtlid;		/* lid of xtree lock on directory */
-#ifdef CONFIG_JFS_POSIX_ACL
-	struct posix_acl *i_acl;
-	struct posix_acl *i_default_acl;
-#endif
 	union {
 		struct {
 			xtpage_t _xtroot;	/* 288: xtree root */
@@ -107,8 +103,6 @@ struct jfs_inode_info {
 #define i_inline u.link._inline
 #define i_inline_ea u.link._inline_ea
 
-#define JFS_ACL_NOT_CACHED ((void *)-1)
-
 #define IREAD_LOCK(ip, subclass) \
 	down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
 #define IREAD_UNLOCK(ip)	up_read(&JFS_IP(ip)->rdwrlock)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 09b1b6ee2186..37e6dcda8fc8 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -128,18 +128,6 @@ static void jfs_destroy_inode(struct inode *inode)
 		ji->active_ag = -1;
 	}
 	spin_unlock_irq(&ji->ag_lock);
-
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (ji->i_acl != JFS_ACL_NOT_CACHED) {
-		posix_acl_release(ji->i_acl);
-		ji->i_acl = JFS_ACL_NOT_CACHED;
-	}
-	if (ji->i_default_acl != JFS_ACL_NOT_CACHED) {
-		posix_acl_release(ji->i_default_acl);
-		ji->i_default_acl = JFS_ACL_NOT_CACHED;
-	}
-#endif
-
 	kmem_cache_free(jfs_inode_cachep, ji);
 }
 
@@ -798,10 +786,6 @@ static void init_once(void *foo)
 	init_rwsem(&jfs_ip->xattr_sem);
 	spin_lock_init(&jfs_ip->ag_lock);
 	jfs_ip->active_ag = -1;
-#ifdef CONFIG_JFS_POSIX_ACL
-	jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
-	jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
-#endif
 	inode_init_once(&jfs_ip->vfs_inode);
 }
 
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 61dfa8173ebc..f6e90e343593 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -727,10 +727,10 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		/*
 		 * We're changing the ACL.  Get rid of the cached one
 		 */
-		acl =JFS_IP(inode)->i_acl;
-		if (acl != JFS_ACL_NOT_CACHED)
+		acl =inode->i_acl;
+		if (acl != ACL_NOT_CACHED)
 			posix_acl_release(acl);
-		JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
+		inode->i_acl = ACL_NOT_CACHED;
 
 		return 0;
 	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
@@ -746,10 +746,10 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		/*
 		 * We're changing the default ACL.  Get rid of the cached one
 		 */
-		acl =JFS_IP(inode)->i_default_acl;
-		if (acl && (acl != JFS_ACL_NOT_CACHED))
+		acl = inode->i_default_acl;
+		if (acl && (acl != ACL_NOT_CACHED))
 			posix_acl_release(acl);
-		JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED;
+		inode->i_default_acl = ACL_NOT_CACHED;
 
 		return 0;
 	}
-- 
cgit v1.2.3


From 290c263bf83cd78e53b1aa3b42165f588163f2be Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:55:12 -0400
Subject: switch jffs2 to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/jffs2/acl.c        | 54 ++++++++++++++++++---------------------------------
 fs/jffs2/acl.h        |  4 ----
 fs/jffs2/jffs2_fs_i.h |  4 ----
 fs/jffs2/os-linux.h   |  4 ----
 fs/jffs2/readinode.c  |  1 -
 5 files changed, 19 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 043740dde20c..ac16589ebbd1 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -158,10 +158,10 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
 
 static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 {
-	struct posix_acl *acl = JFFS2_ACL_NOT_CACHED;
+	struct posix_acl *acl = ACL_NOT_CACHED;
 
 	spin_lock(&inode->i_lock);
-	if (*i_acl != JFFS2_ACL_NOT_CACHED)
+	if (*i_acl != ACL_NOT_CACHED)
 		acl = posix_acl_dup(*i_acl);
 	spin_unlock(&inode->i_lock);
 	return acl;
@@ -170,7 +170,7 @@ static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **
 static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != JFFS2_ACL_NOT_CACHED)
+	if (*i_acl != ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
 	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
@@ -178,21 +178,20 @@ static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct
 
 static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	struct posix_acl *acl;
 	char *value = NULL;
 	int rc, xprefix;
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		acl = jffs2_iget_acl(inode, &f->i_acl_access);
-		if (acl != JFFS2_ACL_NOT_CACHED)
+		acl = jffs2_iget_acl(inode, &inode->i_acl);
+		if (acl != ACL_NOT_CACHED)
 			return acl;
 		xprefix = JFFS2_XPREFIX_ACL_ACCESS;
 		break;
 	case ACL_TYPE_DEFAULT:
-		acl = jffs2_iget_acl(inode, &f->i_acl_default);
-		if (acl != JFFS2_ACL_NOT_CACHED)
+		acl = jffs2_iget_acl(inode, &inode->i_default_acl);
+		if (acl != ACL_NOT_CACHED)
 			return acl;
 		xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
 		break;
@@ -218,10 +217,10 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 	if (!IS_ERR(acl)) {
 		switch (type) {
 		case ACL_TYPE_ACCESS:
-			jffs2_iset_acl(inode, &f->i_acl_access, acl);
+			jffs2_iset_acl(inode, &inode->i_acl, acl);
 			break;
 		case ACL_TYPE_DEFAULT:
-			jffs2_iset_acl(inode, &f->i_acl_default, acl);
+			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
 			break;
 		}
 	}
@@ -249,7 +248,6 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
 
 static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	int rc, xprefix;
 
 	if (S_ISLNK(inode->i_mode))
@@ -288,10 +286,10 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	if (!rc) {
 		switch(type) {
 		case ACL_TYPE_ACCESS:
-			jffs2_iset_acl(inode, &f->i_acl_access, acl);
+			jffs2_iset_acl(inode, &inode->i_acl, acl);
 			break;
 		case ACL_TYPE_DEFAULT:
-			jffs2_iset_acl(inode, &f->i_acl_default, acl);
+			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
 			break;
 		}
 	}
@@ -321,12 +319,11 @@ int jffs2_permission(struct inode *inode, int mask)
 
 int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	struct posix_acl *acl, *clone;
 	int rc;
 
-	f->i_acl_default = NULL;
-	f->i_acl_access = NULL;
+	inode->i_default_acl = NULL;
+	inode->i_acl = NULL;
 
 	if (S_ISLNK(*i_mode))
 		return 0;	/* Symlink always has no-ACL */
@@ -339,7 +336,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 		*i_mode &= ~current_umask();
 	} else {
 		if (S_ISDIR(*i_mode))
-			jffs2_iset_acl(inode, &f->i_acl_default, acl);
+			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
 
 		clone = posix_acl_clone(acl, GFP_KERNEL);
 		if (!clone)
@@ -350,7 +347,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 			return rc;
 		}
 		if (rc > 0)
-			jffs2_iset_acl(inode, &f->i_acl_access, clone);
+			jffs2_iset_acl(inode, &inode->i_acl, clone);
 
 		posix_acl_release(clone);
 	}
@@ -359,17 +356,16 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 
 int jffs2_init_acl_post(struct inode *inode)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	int rc;
 
-	if (f->i_acl_default) {
-		rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, f->i_acl_default);
+	if (inode->i_default_acl) {
+		rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, inode->i_default_acl);
 		if (rc)
 			return rc;
 	}
 
-	if (f->i_acl_access) {
-		rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, f->i_acl_access);
+	if (inode->i_acl) {
+		rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, inode->i_acl);
 		if (rc)
 			return rc;
 	}
@@ -377,18 +373,6 @@ int jffs2_init_acl_post(struct inode *inode)
 	return 0;
 }
 
-void jffs2_clear_acl(struct jffs2_inode_info *f)
-{
-	if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
-		posix_acl_release(f->i_acl_access);
-		f->i_acl_access = JFFS2_ACL_NOT_CACHED;
-	}
-	if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) {
-		posix_acl_release(f->i_acl_default);
-		f->i_acl_default = JFFS2_ACL_NOT_CACHED;
-	}
-}
-
 int jffs2_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 8ca058aed384..fc929f2a14f6 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,13 +26,10 @@ struct jffs2_acl_header {
 
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 
-#define JFFS2_ACL_NOT_CACHED ((void *)-1)
-
 extern int jffs2_permission(struct inode *, int);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
 extern int jffs2_init_acl_post(struct inode *);
-extern void jffs2_clear_acl(struct jffs2_inode_info *);
 
 extern struct xattr_handler jffs2_acl_access_xattr_handler;
 extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -43,6 +40,5 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
 #define jffs2_acl_chmod(inode)			(0)
 #define jffs2_init_acl_pre(dir_i,inode,mode)	(0)
 #define jffs2_init_acl_post(inode)		(0)
-#define jffs2_clear_acl(f)
 
 #endif	/* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 4c41db91eaa4..c6923da98263 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -50,10 +50,6 @@ struct jffs2_inode_info {
 	uint16_t flags;
 	uint8_t usercompr;
 	struct inode vfs_inode;
-#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-	struct posix_acl *i_acl_access;
-	struct posix_acl *i_acl_default;
-#endif
 };
 
 #endif /* _JFFS2_FS_I */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2228380c47b9..a7f03b7ebcb3 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -56,10 +56,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
 	f->target = NULL;
 	f->flags = 0;
 	f->usercompr = 0;
-#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-	f->i_acl_access = JFFS2_ACL_NOT_CACHED;
-	f->i_acl_default = JFFS2_ACL_NOT_CACHED;
-#endif
 }
 
 
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1fc1e92356ee..1a80301004b8 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1424,7 +1424,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
 	struct jffs2_full_dirent *fd, *fds;
 	int deleted;
 
-	jffs2_clear_acl(f);
 	jffs2_xattr_delete_inode(c, f->inocache);
 	mutex_lock(&f->sem);
 	deleted = f->inocache && !f->inocache->pino_nlink;
-- 
cgit v1.2.3


From 5affd88a104af43f0063a12ad1ee4c7a587945dc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:55:32 -0400
Subject: switch btrfs to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/acl.c         | 14 +++++++-------
 fs/btrfs/btrfs_inode.h |  4 ----
 fs/btrfs/ctree.h       |  2 --
 fs/btrfs/inode.c       | 16 ++--------------
 4 files changed, 9 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 603972576f0f..6db8a42a3e5e 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -34,7 +34,7 @@ static void btrfs_update_cached_acl(struct inode *inode,
 				    struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
+	if (*p_acl && *p_acl != ACL_NOT_CACHED)
 		posix_acl_release(*p_acl);
 	*p_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
@@ -50,11 +50,11 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &BTRFS_I(inode)->i_acl;
+		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &BTRFS_I(inode)->i_default_acl;
+		p_acl = &inode->i_default_acl;
 		break;
 	default:
 		return ERR_PTR(-EINVAL);
@@ -67,11 +67,11 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 
 	spin_lock(&inode->i_lock);
 	acl = *p_acl;
-	if (acl != BTRFS_ACL_NOT_CACHED)
+	if (acl != ACL_NOT_CACHED)
 		acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
 
-	if (acl != BTRFS_ACL_NOT_CACHED)
+	if (acl != ACL_NOT_CACHED)
 		return acl;
 
 	size = __btrfs_getxattr(inode, name, "", 0);
@@ -141,13 +141,13 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 		ret = 0;
 		inode->i_mode = mode;
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &BTRFS_I(inode)->i_acl;
+		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EINVAL : 0;
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &BTRFS_I(inode)->i_default_acl;
+		p_acl = &inode->i_default_acl;
 		break;
 	default:
 		return -EINVAL;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index acb4f3517582..ea1ea0af8c0e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -53,10 +53,6 @@ struct btrfs_inode {
 	/* used to order data wrt metadata */
 	struct btrfs_ordered_inode_tree ordered_tree;
 
-	/* standard acl pointers */
-	struct posix_acl *i_acl;
-	struct posix_acl *i_default_acl;
-
 	/* for keeping track of orphaned inodes */
 	struct list_head i_orphan;
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 03441a99ea38..2779c2f5360a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -41,8 +41,6 @@ struct btrfs_ordered_sum;
 
 #define BTRFS_MAGIC "_BHRfS_M"
 
-#define BTRFS_ACL_NOT_CACHED    ((void *)-1)
-
 #define BTRFS_MAX_LEVEL 8
 
 #define BTRFS_COMPAT_EXTENT_TREE_V0
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8612b3a09811..78ad38ddd01f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2123,8 +2123,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	 */
 	maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
 	if (!maybe_acls) {
-		BTRFS_I(inode)->i_acl = NULL;
-		BTRFS_I(inode)->i_default_acl = NULL;
+		inode->i_acl = NULL;
+		inode->i_default_acl = NULL;
 	}
 
 	BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
@@ -3141,9 +3141,6 @@ static noinline void init_btrfs_i(struct inode *inode)
 {
 	struct btrfs_inode *bi = BTRFS_I(inode);
 
-	bi->i_acl = BTRFS_ACL_NOT_CACHED;
-	bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
-
 	bi->generation = 0;
 	bi->sequence = 0;
 	bi->last_trans = 0;
@@ -4640,8 +4637,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	ei->last_trans = 0;
 	ei->logged_trans = 0;
 	btrfs_ordered_inode_tree_init(&ei->ordered_tree);
-	ei->i_acl = BTRFS_ACL_NOT_CACHED;
-	ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
 	INIT_LIST_HEAD(&ei->i_orphan);
 	INIT_LIST_HEAD(&ei->ordered_operations);
 	return &ei->vfs_inode;
@@ -4655,13 +4650,6 @@ void btrfs_destroy_inode(struct inode *inode)
 	WARN_ON(!list_empty(&inode->i_dentry));
 	WARN_ON(inode->i_data.nrpages);
 
-	if (BTRFS_I(inode)->i_acl &&
-	    BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
-		posix_acl_release(BTRFS_I(inode)->i_acl);
-	if (BTRFS_I(inode)->i_default_acl &&
-	    BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
-		posix_acl_release(BTRFS_I(inode)->i_default_acl);
-
 	/*
 	 * Make sure we're properly removed from the ordered operation
 	 * lists.
-- 
cgit v1.2.3


From d441b1c293149212045de00f346c8ea6cd41cce4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:56:34 -0400
Subject: switch nilfs2 to inode->i_acl

Actually, get rid of private analog, since nothing in there is
using ACLs at all so far.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nilfs2/inode.c |  8 --------
 fs/nilfs2/nilfs.h |  4 ----
 fs/nilfs2/super.c | 10 ----------
 3 files changed, 22 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2696d6b513b7..fe9d8f2a13f8 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -309,10 +309,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
 	/* ii->i_file_acl = 0; */
 	/* ii->i_dir_acl = 0; */
 	ii->i_dir_start_lookup = 0;
-#ifdef CONFIG_NILFS_FS_POSIX_ACL
-	ii->i_acl = NULL;
-	ii->i_default_acl = NULL;
-#endif
 	ii->i_cno = 0;
 	nilfs_set_inode_flags(inode);
 	spin_lock(&sbi->s_next_gen_lock);
@@ -434,10 +430,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
 
 	raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
 
-#ifdef CONFIG_NILFS_FS_POSIX_ACL
-	ii->i_acl = NILFS_ACL_NOT_CACHED;
-	ii->i_default_acl = NILFS_ACL_NOT_CACHED;
-#endif
 	if (nilfs_read_inode_common(inode, raw_inode))
 		goto failed_unmap;
 
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index edf6a59d9f2a..724c63766e82 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -57,10 +57,6 @@ struct nilfs_inode_info {
 	 * EAs.
 	 */
 	struct rw_semaphore xattr_sem;
-#endif
-#ifdef CONFIG_NILFS_POSIX_ACL
-	struct posix_acl *i_acl;
-	struct posix_acl *i_default_acl;
 #endif
 	struct buffer_head *i_bh;	/* i_bh contains a new or dirty
 					   disk inode */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index ab785f85aa50..8e2ec43b18f4 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -189,16 +189,6 @@ static void nilfs_clear_inode(struct inode *inode)
 {
 	struct nilfs_inode_info *ii = NILFS_I(inode);
 
-#ifdef CONFIG_NILFS_POSIX_ACL
-	if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) {
-		posix_acl_release(ii->i_acl);
-		ii->i_acl = NILFS_ACL_NOT_CACHED;
-	}
-	if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) {
-		posix_acl_release(ii->i_default_acl);
-		ii->i_default_acl = NILFS_ACL_NOT_CACHED;
-	}
-#endif
 	/*
 	 * Free resources allocated in nilfs_read_inode(), here.
 	 */
-- 
cgit v1.2.3


From e68888bcb60ccba4dc21df9f2d8cd7325b64dce7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 20:46:54 -0400
Subject: reiserfs: minimal fix for ACL caching

reiserfs uses NULL as "unknown" and ERR_PTR(-ENODATA) as "no ACL";
several codepaths store the former instead of the latter.

All those codepaths go through iset_acl() and all cases when it's
called with NULL acl are for the second variety, so the minimal
fix is to teach iset_acl() to deal with that.

Proper fix is to switch to more usual conventions and avoid back
and forth between internally used ERR_PTR(-ENODATA) and NULL
expected by the rest of the kernel.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/xattr_acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index c303c426fe2b..a1a7e3530e17 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -194,7 +194,7 @@ static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
 	spin_lock(&inode->i_lock);
 	if (*i_acl != ERR_PTR(-ENODATA))
 		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
+	*i_acl = acl ? posix_acl_dup(acl) : ERR_PTR(-ENODATA);
 	spin_unlock(&inode->i_lock);
 }
 
-- 
cgit v1.2.3


From 7a77b15d9294749809de918e24bebc39e0fbc9ab Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 21:01:13 -0400
Subject: switch reiserfs to usual conventions for caching ACLs

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/super.c     | 12 ++++++------
 fs/reiserfs/xattr_acl.c | 21 ++++++++-------------
 2 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2969773cfc22..b194451fc04b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -530,8 +530,8 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	inode_init_once(&ei->vfs_inode);
 #ifdef CONFIG_REISERFS_FS_POSIX_ACL
-	ei->i_acl_access = NULL;
-	ei->i_acl_default = NULL;
+	ei->i_acl_access = ACL_NOT_CACHED;
+	ei->i_acl_default = ACL_NOT_CACHED;
 #endif
 }
 
@@ -586,14 +586,14 @@ static void reiserfs_clear_inode(struct inode *inode)
 	struct posix_acl *acl;
 
 	acl = REISERFS_I(inode)->i_acl_access;
-	if (acl && !IS_ERR(acl))
+	if (acl && acl != ACL_NOT_CACHED)
 		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_access = NULL;
+	REISERFS_I(inode)->i_acl_access = ACL_NOT_CACHED;
 
 	acl = REISERFS_I(inode)->i_acl_default;
-	if (acl && !IS_ERR(acl))
+	if (acl && acl != ACL_NOT_CACHED)
 		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_default = NULL;
+	REISERFS_I(inode)->i_acl_default = ACL_NOT_CACHED;
 }
 #else
 #define reiserfs_clear_inode NULL
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a1a7e3530e17..7b3aeb9327d3 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -192,19 +192,19 @@ static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
 			    struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != ERR_PTR(-ENODATA))
+	if (*i_acl != ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
-	*i_acl = acl ? posix_acl_dup(acl) : ERR_PTR(-ENODATA);
+	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
 }
 
 static inline struct posix_acl *iget_acl(struct inode *inode,
 					 struct posix_acl **i_acl)
 {
-	struct posix_acl *acl = ERR_PTR(-ENODATA);
+	struct posix_acl *acl = ACL_NOT_CACHED;
 
 	spin_lock(&inode->i_lock);
-	if (*i_acl != ERR_PTR(-ENODATA))
+	if (*i_acl != ACL_NOT_CACHED)
 		acl = posix_acl_dup(*i_acl);
 	spin_unlock(&inode->i_lock);
 
@@ -239,15 +239,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 	}
 
 	acl = iget_acl(inode, p_acl);
-	if (acl && !IS_ERR(acl))
+	if (acl != ACL_NOT_CACHED)
 		return acl;
-	else if (PTR_ERR(acl) == -ENODATA)
-		return NULL;
 
 	size = reiserfs_xattr_get(inode, name, NULL, 0);
 	if (size < 0) {
 		if (size == -ENODATA || size == -ENOSYS) {
-			*p_acl = ERR_PTR(-ENODATA);
+			*p_acl = NULL;
 			return NULL;
 		}
 		return ERR_PTR(size);
@@ -262,7 +260,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 		/* This shouldn't actually happen as it should have
 		   been caught above.. but just in case */
 		acl = NULL;
-		*p_acl = ERR_PTR(-ENODATA);
+		*p_acl = acl;
 	} else if (retval < 0) {
 		acl = ERR_PTR(retval);
 	} else {
@@ -379,11 +377,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
 	}
 
 	acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
-	if (IS_ERR(acl)) {
-		if (PTR_ERR(acl) == -ENODATA)
-			goto apply_umask;
+	if (IS_ERR(acl))
 		return PTR_ERR(acl);
-	}
 
 	if (acl) {
 		struct posix_acl *acl_copy;
-- 
cgit v1.2.3


From 281eede0328c84a8f20e0e85b807d5b51c3de4f2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 21:07:04 -0400
Subject: switch reiserfs to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/inode.c     |  4 ----
 fs/reiserfs/super.c     | 24 ------------------------
 fs/reiserfs/xattr_acl.c | 10 ++++------
 3 files changed, 4 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6fd0f47e45db..a14d6cd9eeda 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
 	REISERFS_I(inode)->i_trans_id = 0;
 	REISERFS_I(inode)->i_jl = NULL;
 	mutex_init(&(REISERFS_I(inode)->i_mmap));
-	reiserfs_init_acl_access(inode);
-	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
 	if (stat_data_v1(ih)) {
@@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
 	mutex_init(&(REISERFS_I(inode)->i_mmap));
-	reiserfs_init_acl_access(inode);
-	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
 	/* key to search for correct place for new stat data */
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b194451fc04b..d3aeb061612b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,10 +529,6 @@ static void init_once(void *foo)
 
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	inode_init_once(&ei->vfs_inode);
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-	ei->i_acl_access = ACL_NOT_CACHED;
-	ei->i_acl_default = ACL_NOT_CACHED;
-#endif
 }
 
 static int init_inodecache(void)
@@ -580,25 +576,6 @@ static void reiserfs_dirty_inode(struct inode *inode)
 	reiserfs_write_unlock(inode->i_sb);
 }
 
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-static void reiserfs_clear_inode(struct inode *inode)
-{
-	struct posix_acl *acl;
-
-	acl = REISERFS_I(inode)->i_acl_access;
-	if (acl && acl != ACL_NOT_CACHED)
-		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_access = ACL_NOT_CACHED;
-
-	acl = REISERFS_I(inode)->i_acl_default;
-	if (acl && acl != ACL_NOT_CACHED)
-		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_default = ACL_NOT_CACHED;
-}
-#else
-#define reiserfs_clear_inode NULL
-#endif
-
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
 				    size_t, loff_t);
@@ -612,7 +589,6 @@ static const struct super_operations reiserfs_sops = {
 	.write_inode = reiserfs_write_inode,
 	.dirty_inode = reiserfs_dirty_inode,
 	.delete_inode = reiserfs_delete_inode,
-	.clear_inode = reiserfs_clear_inode,
 	.put_super = reiserfs_put_super,
 	.write_super = reiserfs_write_super,
 	.sync_fs = reiserfs_sync_fs,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 7b3aeb9327d3..b6e473faa8b8 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -223,16 +223,15 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 	struct posix_acl *acl, **p_acl;
 	int size;
 	int retval;
-	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &reiserfs_i->i_acl_access;
+		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &reiserfs_i->i_acl_default;
+		p_acl = &inode->i_default_acl;
 		break;
 	default:
 		return ERR_PTR(-EINVAL);
@@ -288,7 +287,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	struct posix_acl **p_acl;
 	size_t size = 0;
 	int error;
-	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
@@ -296,7 +294,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &reiserfs_i->i_acl_access;
+		p_acl = &inode->i_acl;
 		if (acl) {
 			mode_t mode = inode->i_mode;
 			error = posix_acl_equiv_mode(acl, &mode);
@@ -311,7 +309,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &reiserfs_i->i_acl_default;
+		p_acl = &inode->i_default_acl;
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EACCES : 0;
 		break;
-- 
cgit v1.2.3


From 073aaa1b142461d91f83da66db1184d7c1b1edea Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 9 Jun 2009 12:11:54 -0400
Subject: helpers for acl caching + switch to those

helpers: get_cached_acl(inode, type), set_cached_acl(inode, type, acl),
forget_cached_acl(inode, type).

ubifs/xattr.c needed includes reordered, the rest is a plain switchover.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/acl.c          | 44 ++++++--------------------
 fs/ext2/acl.c           | 79 ++++++++++------------------------------------
 fs/ext3/acl.c           | 83 +++++++++++--------------------------------------
 fs/ext4/acl.c           | 65 ++++++--------------------------------
 fs/jffs2/acl.c          | 60 +++++++----------------------------
 fs/jfs/acl.c            | 32 ++++++++-----------
 fs/jfs/xattr.c          | 10 ++----
 fs/reiserfs/xattr_acl.c | 49 ++++++-----------------------
 fs/ubifs/xattr.c        |  2 +-
 9 files changed, 91 insertions(+), 333 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6db8a42a3e5e..f128427b995b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -29,51 +29,28 @@
 
 #ifdef CONFIG_FS_POSIX_ACL
 
-static void btrfs_update_cached_acl(struct inode *inode,
-				    struct posix_acl **p_acl,
-				    struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*p_acl && *p_acl != ACL_NOT_CACHED)
-		posix_acl_release(*p_acl);
-	*p_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 {
 	int size;
 	const char *name;
 	char *value = NULL;
-	struct posix_acl *acl = NULL, **p_acl;
+	struct posix_acl *acl;
+
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &inode->i_default_acl;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 
-	/* Handle the cached NULL acl case without locking */
-	acl = ACCESS_ONCE(*p_acl);
-	if (!acl)
-		return acl;
-
-	spin_lock(&inode->i_lock);
-	acl = *p_acl;
-	if (acl != ACL_NOT_CACHED)
-		acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-
-	if (acl != ACL_NOT_CACHED)
-		return acl;
-
 	size = __btrfs_getxattr(inode, name, "", 0);
 	if (size > 0) {
 		value = kzalloc(size, GFP_NOFS);
@@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		size = __btrfs_getxattr(inode, name, value, size);
 		if (size > 0) {
 			acl = posix_acl_from_xattr(value, size);
-			btrfs_update_cached_acl(inode, p_acl, acl);
+			set_cached_acl(inode, type, acl);
 		}
 		kfree(value);
 	} else if (size == -ENOENT || size == -ENODATA || size == 0) {
 		/* FIXME, who returns -ENOENT?  I think nobody */
 		acl = NULL;
-		btrfs_update_cached_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 	} else {
 		acl = ERR_PTR(-EIO);
 	}
@@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	int ret, size = 0;
 	const char *name;
-	struct posix_acl **p_acl;
 	char *value = NULL;
 	mode_t mode;
 
@@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 		ret = 0;
 		inode->i_mode = mode;
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EINVAL : 0;
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &inode->i_default_acl;
 		break;
 	default:
 		return -EINVAL;
@@ -172,7 +146,7 @@ out:
 	kfree(value);
 
 	if (!ret)
-		btrfs_update_cached_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 
 	return ret;
 }
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d2ffddc12117..d636e1297cad 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -125,30 +125,6 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACL_NOT_CACHED;
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
-static inline void
-ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-		   struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * inode->i_mutex: don't care
  */
@@ -163,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = ext2_iget_acl(inode, &inode->i_acl);
-			if (acl != ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = ext2_iget_acl(inode, &inode->i_default_acl);
-			if (acl != ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
-			break;
-
-		default:
-			return ERR_PTR(-EINVAL);
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		BUG();
 	}
 	retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
@@ -196,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext2_iset_acl(inode, &inode->i_acl, acl);
-				break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-			case ACL_TYPE_DEFAULT:
-				ext2_iset_acl(inode, &inode->i_default_acl, acl);
-				break;
-		}
-	}
 	return acl;
 }
 
@@ -261,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	error = ext2_xattr_set(inode, name_index, "", value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext2_iset_acl(inode, &inode->i_acl, acl);
-				break;
-
-			case ACL_TYPE_DEFAULT:
-				ext2_iset_acl(inode, &inode->i_default_acl, acl);
-				break;
-		}
-	}
+	if (!error)
+		set_cached_acl(inode, type, acl);
 	return error;
 }
 
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index a9707689d9e1..e167bae37ef0 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -126,33 +126,6 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-
-	if (acl) {
-		spin_lock(&inode->i_lock);
-		acl = *i_acl;
-		if (acl != ACL_NOT_CACHED)
-			acl = posix_acl_dup(acl);
-		spin_unlock(&inode->i_lock);
-	}
-
-	return acl;
-}
-
-static inline void
-ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-                  struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -169,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = ext3_iget_acl(inode, &inode->i_acl);
-			if (acl != ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = ext3_iget_acl(inode, &inode->i_default_acl);
-			if (acl != ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
-			break;
-
-		default:
-			return ERR_PTR(-EINVAL);
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		BUG();
 	}
+
 	retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
 		value = kmalloc(retval, GFP_NOFS);
@@ -202,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &inode->i_acl, acl);
-				break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &inode->i_default_acl, acl);
-				break;
-		}
-	}
 	return acl;
 }
 
@@ -269,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 				      value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &inode->i_acl, acl);
-				break;
 
-			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &inode->i_default_acl, acl);
-				break;
-		}
-	}
+	if (!error)
+		set_cached_acl(inode, type, acl);
+
 	return error;
 }
 
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0084e3a19d86..f6d8967149ca 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -126,33 +126,6 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-
-	if (acl) {
-		spin_lock(&inode->i_lock);
-		acl = *i_acl;
-		if (acl != ACL_NOT_CACHED)
-			acl = posix_acl_dup(acl);
-		spin_unlock(&inode->i_lock);
-	}
-
-	return acl;
-}
-
-static inline void
-ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-		struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -169,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		acl = ext4_iget_acl(inode, &inode->i_acl);
-		if (acl != ACL_NOT_CACHED)
-			return acl;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 		break;
-
 	case ACL_TYPE_DEFAULT:
-		acl = ext4_iget_acl(inode, &inode->i_default_acl);
-		if (acl != ACL_NOT_CACHED)
-			return acl;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
 		break;
-
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 	retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
@@ -202,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			ext4_iset_acl(inode, &inode->i_acl, acl);
-			break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-		case ACL_TYPE_DEFAULT:
-			ext4_iset_acl(inode, &inode->i_default_acl, acl);
-			break;
-		}
-	}
 	return acl;
 }
 
@@ -269,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 				      value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			ext4_iset_acl(inode, &inode->i_acl, acl);
-			break;
+	if (!error)
+		set_cached_acl(inode, type, acl);
 
-		case ACL_TYPE_DEFAULT:
-			ext4_iset_acl(inode, &inode->i_default_acl, acl);
-			break;
-		}
-	}
 	return error;
 }
 
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index ac16589ebbd1..edd2ad6416d8 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -156,47 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
 	return ERR_PTR(-EINVAL);
 }
 
-static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACL_NOT_CACHED;
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-	return acl;
-}
-
-static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 {
 	struct posix_acl *acl;
 	char *value = NULL;
 	int rc, xprefix;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		acl = jffs2_iget_acl(inode, &inode->i_acl);
-		if (acl != ACL_NOT_CACHED)
-			return acl;
 		xprefix = JFFS2_XPREFIX_ACL_ACCESS;
 		break;
 	case ACL_TYPE_DEFAULT:
-		acl = jffs2_iget_acl(inode, &inode->i_default_acl);
-		if (acl != ACL_NOT_CACHED)
-			return acl;
 		xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 	rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0);
 	if (rc > 0) {
@@ -214,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 	}
 	if (value)
 		kfree(value);
-	if (!IS_ERR(acl)) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			jffs2_iset_acl(inode, &inode->i_acl, acl);
-			break;
-		case ACL_TYPE_DEFAULT:
-			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
-			break;
-		}
-	}
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 	return acl;
 }
 
@@ -283,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 		return -EINVAL;
 	}
 	rc = __jffs2_set_acl(inode, xprefix, acl);
-	if (!rc) {
-		switch(type) {
-		case ACL_TYPE_ACCESS:
-			jffs2_iset_acl(inode, &inode->i_acl, acl);
-			break;
-		case ACL_TYPE_DEFAULT:
-			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
-			break;
-		}
-	}
+	if (!rc)
+		set_cached_acl(inode, type, acl);
 	return rc;
 }
 
@@ -336,7 +298,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 		*i_mode &= ~current_umask();
 	} else {
 		if (S_ISDIR(*i_mode))
-			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
+			set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
 
 		clone = posix_acl_clone(acl, GFP_KERNEL);
 		if (!clone)
@@ -347,7 +309,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 			return rc;
 		}
 		if (rc > 0)
-			jffs2_iset_acl(inode, &inode->i_acl, clone);
+			set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 
 		posix_acl_release(clone);
 	}
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 5fcfc9857c11..f272bf032e1e 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -31,26 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 {
 	struct posix_acl *acl;
 	char *ea_name;
-	struct posix_acl **p_acl;
 	int size;
 	char *value = NULL;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = POSIX_ACL_XATTR_ACCESS;
-			p_acl = &inode->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
 			ea_name = POSIX_ACL_XATTR_DEFAULT;
-			p_acl = &inode->i_default_acl;
 			break;
 		default:
 			return ERR_PTR(-EINVAL);
 	}
 
-	if (*p_acl != ACL_NOT_CACHED)
-		return posix_acl_dup(*p_acl);
-
 	size = __jfs_getxattr(inode, ea_name, NULL, 0);
 
 	if (size > 0) {
@@ -61,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 	}
 
 	if (size < 0) {
-		if (size == -ENODATA) {
-			*p_acl = NULL;
+		if (size == -ENODATA)
 			acl = NULL;
-		} else
+		else
 			acl = ERR_PTR(size);
 	} else {
 		acl = posix_acl_from_xattr(value, size);
-		if (!IS_ERR(acl))
-			*p_acl = posix_acl_dup(acl);
 	}
 	kfree(value);
+	if (!IS_ERR(acl)) {
+		set_cached_acl(inode, type, acl);
+		posix_acl_release(acl);
+	}
 	return acl;
 }
 
@@ -79,7 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 		       struct posix_acl *acl)
 {
 	char *ea_name;
-	struct posix_acl **p_acl;
 	int rc;
 	int size = 0;
 	char *value = NULL;
@@ -90,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = POSIX_ACL_XATTR_ACCESS;
-			p_acl = &inode->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
 			ea_name = POSIX_ACL_XATTR_DEFAULT;
-			p_acl = &inode->i_default_acl;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
 			break;
@@ -114,11 +110,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 out:
 	kfree(value);
 
-	if (!rc) {
-		if (*p_acl && (*p_acl != ACL_NOT_CACHED))
-			posix_acl_release(*p_acl);
-		*p_acl = posix_acl_dup(acl);
-	}
+	if (!rc)
+		set_cached_acl(inode, type, acl);
+
 	return rc;
 }
 
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index f6e90e343593..fad364548bc9 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		/*
 		 * We're changing the ACL.  Get rid of the cached one
 		 */
-		acl =inode->i_acl;
-		if (acl != ACL_NOT_CACHED)
-			posix_acl_release(acl);
-		inode->i_acl = ACL_NOT_CACHED;
+		forget_cached_acl(inode, ACL_TYPE_ACCESS);
 
 		return 0;
 	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
@@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		/*
 		 * We're changing the default ACL.  Get rid of the cached one
 		 */
-		acl = inode->i_default_acl;
-		if (acl && (acl != ACL_NOT_CACHED))
-			posix_acl_release(acl);
-		inode->i_default_acl = ACL_NOT_CACHED;
+		forget_cached_acl(inode, ACL_TYPE_DEFAULT);
 
 		return 0;
 	}
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index b6e473faa8b8..35d6e672a279 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
 	return ERR_PTR(-EINVAL);
 }
 
-static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
-			    struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
-static inline struct posix_acl *iget_acl(struct inode *inode,
-					 struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACL_NOT_CACHED;
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -220,31 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode,
 struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 {
 	char *name, *value;
-	struct posix_acl *acl, **p_acl;
+	struct posix_acl *acl;
 	int size;
 	int retval;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &inode->i_default_acl;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 
-	acl = iget_acl(inode, p_acl);
-	if (acl != ACL_NOT_CACHED)
-		return acl;
-
 	size = reiserfs_xattr_get(inode, name, NULL, 0);
 	if (size < 0) {
 		if (size == -ENODATA || size == -ENOSYS) {
-			*p_acl = NULL;
+			set_cached_acl(inode, type, NULL);
 			return NULL;
 		}
 		return ERR_PTR(size);
@@ -259,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 		/* This shouldn't actually happen as it should have
 		   been caught above.. but just in case */
 		acl = NULL;
-		*p_acl = acl;
 	} else if (retval < 0) {
 		acl = ERR_PTR(retval);
 	} else {
 		acl = posix_acl_from_disk(value, retval);
-		if (!IS_ERR(acl))
-			iset_acl(inode, p_acl, acl);
 	}
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
 	kfree(value);
 	return acl;
@@ -284,7 +258,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 {
 	char *name;
 	void *value = NULL;
-	struct posix_acl **p_acl;
 	size_t size = 0;
 	int error;
 
@@ -294,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &inode->i_acl;
 		if (acl) {
 			mode_t mode = inode->i_mode;
 			error = posix_acl_equiv_mode(acl, &mode);
@@ -309,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &inode->i_default_acl;
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EACCES : 0;
 		break;
@@ -342,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	kfree(value);
 
 	if (!error)
-		iset_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 
 	return error;
 }
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index cfd31e229c89..adafcf556531 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -55,9 +55,9 @@
  * ACL support is not implemented.
  */
 
+#include "ubifs.h"
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
-#include "ubifs.h"
 
 /*
  * Limit the number of extended attributes per inode so that the total size
-- 
cgit v1.2.3


From 1cbd20d820c36f52543e3e4cd0067ebf52aa388f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 9 Jun 2009 13:29:39 -0400
Subject: switch xfs to generic acl caching helpers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/xfs/linux-2.6/xfs_acl.c | 73 ++++++----------------------------------------
 fs/xfs/xfs_acl.h           |  4 ---
 fs/xfs/xfs_iget.c          |  2 --
 fs/xfs/xfs_inode.h         |  5 ----
 4 files changed, 9 insertions(+), 75 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 1e9d1246eebc..b23a54506446 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -25,14 +25,10 @@
 #include <linux/posix_acl_xattr.h>
 
 
-#define XFS_ACL_NOT_CACHED	((void *)-1)
-
 /*
  * Locking scheme:
  *  - all ACL updates are protected by inode->i_mutex, which is taken before
  *    calling into this file.
- *  - access and updates to the ip->i_acl and ip->i_default_acl pointers are
- *    protected by inode->i_lock.
  */
 
 STATIC struct posix_acl *
@@ -102,59 +98,35 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
 	}
 }
 
-/*
- * Update the cached ACL pointer in the inode.
- *
- * Because we don't hold any locks while reading/writing the attribute
- * from/to disk another thread could have raced and updated the cached
- * ACL value before us. In that case we release the previous cached value
- * and update it with our new value.
- */
-STATIC void
-xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
-		struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(*p_acl);
-	*p_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 struct posix_acl *
 xfs_get_acl(struct inode *inode, int type)
 {
 	struct xfs_inode *ip = XFS_I(inode);
-	struct posix_acl *acl = NULL, **p_acl;
+	struct posix_acl *acl;
 	struct xfs_acl *xfs_acl;
 	int len = sizeof(struct xfs_acl);
 	char *ea_name;
 	int error;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		ea_name = SGI_ACL_FILE;
-		p_acl = &ip->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		ea_name = SGI_ACL_DEFAULT;
-		p_acl = &ip->i_default_acl;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 
-	spin_lock(&inode->i_lock);
-	if (*p_acl != XFS_ACL_NOT_CACHED)
-		acl = posix_acl_dup(*p_acl);
-	spin_unlock(&inode->i_lock);
-
 	/*
 	 * If we have a cached ACLs value just return it, not need to
 	 * go out to the disk.
 	 */
-	if (acl)
-		return acl;
 
 	xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
 	if (!xfs_acl)
@@ -165,7 +137,7 @@ xfs_get_acl(struct inode *inode, int type)
 		/*
 		 * If the attribute doesn't exist make sure we have a negative
 		 * cache entry, for any other error assume it is transient and
-		 * leave the cache entry as XFS_ACL_NOT_CACHED.
+		 * leave the cache entry as ACL_NOT_CACHED.
 		 */
 		if (error == -ENOATTR) {
 			acl = NULL;
@@ -179,7 +151,7 @@ xfs_get_acl(struct inode *inode, int type)
 		goto out;
 
  out_update_cache:
-	xfs_update_cached_acl(inode, p_acl, acl);
+	set_cached_acl(inode, type, acl);
  out:
 	kfree(xfs_acl);
 	return acl;
@@ -189,7 +161,6 @@ STATIC int
 xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
 	struct xfs_inode *ip = XFS_I(inode);
-	struct posix_acl **p_acl;
 	char *ea_name;
 	int error;
 
@@ -199,13 +170,11 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		ea_name = SGI_ACL_FILE;
-		p_acl = &ip->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EACCES : 0;
 		ea_name = SGI_ACL_DEFAULT;
-		p_acl = &ip->i_default_acl;
 		break;
 	default:
 		return -EINVAL;
@@ -242,7 +211,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	}
 
 	if (!error)
-		xfs_update_cached_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 	return error;
 }
 
@@ -384,30 +353,6 @@ xfs_acl_chmod(struct inode *inode)
 	return error;
 }
 
-void
-xfs_inode_init_acls(struct xfs_inode *ip)
-{
-	/*
-	 * No need for locking, inode is not live yet.
-	 */
-	ip->i_acl = XFS_ACL_NOT_CACHED;
-	ip->i_default_acl = XFS_ACL_NOT_CACHED;
-}
-
-void
-xfs_inode_clear_acls(struct xfs_inode *ip)
-{
-	/*
-	 * No need for locking here, the inode is not live anymore
-	 * and just about to be freed.
-	 */
-	if (ip->i_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(ip->i_acl);
-	if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(ip->i_default_acl);
-}
-
-
 /*
  * System xattr handlers.
  *
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 63dc1f2efad5..947b150df8ed 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,8 +46,6 @@ extern int xfs_check_acl(struct inode *inode, int mask);
 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
 extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
 extern int xfs_acl_chmod(struct inode *inode);
-extern void xfs_inode_init_acls(struct xfs_inode *ip);
-extern void xfs_inode_clear_acls(struct xfs_inode *ip);
 extern int posix_acl_access_exists(struct inode *inode);
 extern int posix_acl_default_exists(struct inode *inode);
 
@@ -57,8 +55,6 @@ extern struct xattr_handler xfs_xattr_system_handler;
 # define xfs_get_acl(inode, type)			NULL
 # define xfs_inherit_acl(inode, default_acl)		0
 # define xfs_acl_chmod(inode)				0
-# define xfs_inode_init_acls(ip)
-# define xfs_inode_clear_acls(ip)
 # define posix_acl_access_exists(inode)			0
 # define posix_acl_default_exists(inode)		0
 #endif /* CONFIG_XFS_POSIX_ACL */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 76c540f719e4..5fcec6f020a7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -83,7 +83,6 @@ xfs_inode_alloc(
 	memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
 	ip->i_size = 0;
 	ip->i_new_size = 0;
-	xfs_inode_init_acls(ip);
 
 	/*
 	 * Initialize inode's trace buffers.
@@ -560,7 +559,6 @@ xfs_ireclaim(
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
-	xfs_inode_clear_acls(ip);
 	kmem_zone_free(xfs_inode_zone, ip);
 }
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 77016702938b..1804f866a71d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -273,11 +273,6 @@ typedef struct xfs_inode {
 	/* VFS inode */
 	struct inode		i_vnode;	/* embedded VFS inode */
 
-#ifdef CONFIG_XFS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
-
 	/* Trace buffers per inode. */
 #ifdef XFS_INODE_TRACE
 	struct ktrace		*i_trace;	/* general inode trace */
-- 
cgit v1.2.3


From 72c04902d1e27c8a324014cff1d4475c11b1cecd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 24 Jun 2009 16:58:48 -0400
Subject: Get "no acls for this inode" right, fix shmem breakage

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/inode.c | 6 ++----
 fs/jffs2/acl.c   | 3 +--
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 78ad38ddd01f..dbe1aabf96cd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	 * any xattrs or acls
 	 */
 	maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
-	if (!maybe_acls) {
-		inode->i_acl = NULL;
-		inode->i_default_acl = NULL;
-	}
+	if (!maybe_acls)
+		cache_no_acl(inode);
 
 	BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
 						alloc_group_block, 0);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index edd2ad6416d8..8fcb6239218e 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -284,8 +284,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 	struct posix_acl *acl, *clone;
 	int rc;
 
-	inode->i_default_acl = NULL;
-	inode->i_acl = NULL;
+	cache_no_acl(inode);
 
 	if (S_ISLNK(*i_mode))
 		return 0;	/* Symlink always has no-ACL */
-- 
cgit v1.2.3


From d5bb68adda7cc179e8efadeaa3a283cb470f13a6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 24 Jun 2009 17:02:42 -0400
Subject: another race fix in jfs_check_acl()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/jfs/acl.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index f272bf032e1e..91fa3ad6e8c2 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -118,15 +118,16 @@ out:
 
 static int jfs_check_acl(struct inode *inode, int mask)
 {
-	if (inode->i_acl == ACL_NOT_CACHED) {
-		struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
-		if (IS_ERR(acl))
-			return PTR_ERR(acl);
+	struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
+
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
 		posix_acl_release(acl);
+		return error;
 	}
 
-	if (inode->i_acl)
-		return posix_acl_permission(inode, inode->i_acl, mask);
 	return -EAGAIN;
 }
 
-- 
cgit v1.2.3


From 268875b9d1dd1bf0b523c59e736da9bc20c8ce1f Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 25 Jun 2009 00:29:21 +0000
Subject: [CIFS] Do not send tree disconnect if session is already disconnected

Noticed this when tree connect timed out (due to Samba server crash) -
we try to send a tree disconnect for a tid that does not exist
since we don't have a valid tree id yet. This checks that the
session is valid before sending the tree disconnect to handle
this case.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES   | 2 +-
 fs/cifs/cifssmb.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index b48689839428..3a9b7a58a51d 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,7 @@ client generated ones by default (mount option "serverino" turned
 on by default if server supports it).  Add forceuid and forcegid
 mount options (so that when negotiating unix extensions specifying
 which uid mounted does not immediately force the server's reported
-uids to be overridden).
+uids to be overridden).  Add support for scope moutn parm.
 
 Version 1.58
 ------------
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b84c61d5bca4..58d65a9a79c3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -729,7 +729,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
 	 * the tcon is no longer on the list, so no need to take lock before
 	 * checking this.
 	 */
-	if (tcon->need_reconnect)
+	if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
 		return 0;
 
 	rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon,
-- 
cgit v1.2.3


From 681bf72e4893a187cf6b6b62c08fc193f81c8c2f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 11 Jun 2009 10:27:31 -0400
Subject: cifs: have cifs parse scope_id out of IPv6 addresses and use it

This patch has CIFS look for a '%' in an IPv6 address. If one is
present then it will try to treat that value as a numeric interface
index suitable for stuffing into the sin6_scope_id field.

This should allow people to mount servers on IPv6 link-local addresses.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: David Holder <david@erion.co.uk>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c     |  6 ++++--
 fs/cifs/dns_resolve.c |  4 ++--
 fs/cifs/netmisc.c     | 32 +++++++++++++++++++++++++++-----
 3 files changed, 33 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c368ad658236..3fb799ff55c9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1386,8 +1386,10 @@ cifs_find_tcp_session(struct sockaddr_storage *addr)
 		     server->addr.sockAddr.sin_addr.s_addr))
 			continue;
 		else if (addr->ss_family == AF_INET6 &&
-			 !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr,
-					  &addr6->sin6_addr))
+			 (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr,
+					   &addr6->sin6_addr) ||
+			  server->addr.sockAddr6.sin6_scope_id !=
+					   addr6->sin6_scope_id))
 			continue;
 
 		++server->srv_count;
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 91b5500755bf..87948147d7ec 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -35,7 +35,7 @@
  * 		0 - name is not IP
  */
 static int
-is_ip(const char *name)
+is_ip(char *name)
 {
 	struct sockaddr_storage ss;
 
@@ -57,7 +57,7 @@ dns_resolver_instantiate(struct key *key, const void *data,
 	ip[datalen] = '\0';
 
 	/* make sure this looks like an address */
-	if (!is_ip((const char *) ip)) {
+	if (!is_ip(ip)) {
 		kfree(ip);
 		return -EINVAL;
 	}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index f9a54da97d34..bd6d6895730d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -158,25 +158,47 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
 /*
  * Try to convert a string to an IPv4 address and then attempt to convert
  * it to an IPv6 address if that fails. Set the family field if either
- * succeeds.
+ * succeeds. If it's an IPv6 address and it has a '%' sign in it, try to
+ * treat the part following it as a numeric sin6_scope_id.
  *
  * Returns 0 on failure.
  */
 int
 cifs_convert_address(char *src, void *dst)
 {
+	int rc;
+	char *pct, *endp;
 	struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
 	struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
 
+	/* IPv4 address */
 	if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) {
 		s4->sin_family = AF_INET;
 		return 1;
-	} else if (cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr)) {
-		s6->sin6_family = AF_INET6;
-		return 1;
 	}
 
-	return 0;
+	/* temporarily terminate string */
+	pct = strchr(src, '%');
+	if (pct)
+		*pct = '\0';
+
+	rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr);
+
+	/* repair temp termination (if any) and make pct point to scopeid */
+	if (pct)
+		*pct++ = '%';
+
+	if (!rc)
+		return rc;
+
+	s6->sin6_family = AF_INET6;
+	if (pct) {
+		s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
+		if (!*pct || *endp)
+			return 0;
+	}
+
+	return rc;
 }
 
 /*****************************************************************************
-- 
cgit v1.2.3


From b48a485884b5afb3e33b1871bcbd246b67491923 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 25 Jun 2009 00:56:54 -0400
Subject: cifs: fix problems with earlier patches

cifs: fix problems with earlier patches

cifs_show_address hasn't been introduced yet, and fix a typo that was
silently fixed by a later patch in the series.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ddef913ff3e6..b5e9f398c2e5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -374,8 +374,6 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 	if (tcon->ses->domainName)
 		seq_printf(s, ",domain=%s", tcon->ses->domainName);
 
-	cifs_show_address(s, tcon->ses->server);
-
 	seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
 		seq_printf(s, ",forceuid");
-- 
cgit v1.2.3


From 6459340cfcc6f6d165b27c3dd955aeb55a1b73d3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 25 Jun 2009 00:56:55 -0400
Subject: cifs: remove rw/ro options

cifs: remove rw/ro options

These options are handled at the VFS layer. They only ever set the
option in the smb_vol struct. Nothing was ever done with them afterward
anyway.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3fb799ff55c9..a581cfa2ba82 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -70,7 +70,6 @@ struct smb_vol {
 	mode_t file_mode;
 	mode_t dir_mode;
 	unsigned secFlg;
-	bool rw:1;
 	bool retry:1;
 	bool intr:1;
 	bool setuids:1;
@@ -832,7 +831,6 @@ cifs_parse_mount_options(char *options, const char *devname,
 	vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
 
 	/* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
-	vol->rw = true;
 	/* default is always to request posix paths. */
 	vol->posix_paths = 1;
 	/* default to using server inode numbers where available */
@@ -1198,8 +1196,6 @@ cifs_parse_mount_options(char *options, const char *devname,
 			/* ignore */
 		} else if (strnicmp(data, "guest", 5) == 0) {
 			/* ignore */
-		} else if (strnicmp(data, "rw", 2) == 0) {
-			vol->rw = true;
 		} else if (strnicmp(data, "noblocksend", 11) == 0) {
 			vol->noblocksnd = 1;
 		} else if (strnicmp(data, "noautotune", 10) == 0) {
@@ -1218,8 +1214,6 @@ cifs_parse_mount_options(char *options, const char *devname,
 			    parse these options again and set anything and it
 			    is ok to just ignore them */
 			continue;
-		} else if (strnicmp(data, "ro", 2) == 0) {
-			vol->rw = false;
 		} else if (strnicmp(data, "hard", 4) == 0) {
 			vol->retry = 1;
 		} else if (strnicmp(data, "soft", 4) == 0) {
-- 
cgit v1.2.3


From 6debdbc0ba6253ac519cd5a3d22e30f1f9f1dd12 Mon Sep 17 00:00:00 2001
From: Simo Leone <simo@archlinux.org>
Date: Thu, 25 Jun 2009 02:44:43 +0000
Subject: [CIFS] Copy struct *after* setting the port, instead of before.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Simo Leone <simo@archlinux.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a581cfa2ba82..12c2cf693555 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1496,14 +1496,14 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		cFYI(1, ("attempting ipv6 connect"));
 		/* BB should we allow ipv6 on port 139? */
 		/* other OS never observed in Wild doing 139 with v6 */
+		sin_server6->sin6_port = htons(volume_info->port);
 		memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
 			sizeof(struct sockaddr_in6));
-		sin_server6->sin6_port = htons(volume_info->port);
 		rc = ipv6_connect(tcp_ses);
 	} else {
+		sin_server->sin_port = htons(volume_info->port);
 		memcpy(&tcp_ses->addr.sockAddr, sin_server,
 			sizeof(struct sockaddr_in));
-		sin_server->sin_port = htons(volume_info->port);
 		rc = ipv4_connect(tcp_ses);
 	}
 	if (rc < 0) {
-- 
cgit v1.2.3


From f46c7234e472ceee39afea4fb5a4365843e1850a Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 25 Jun 2009 03:04:20 +0000
Subject: [CIFS] cleanup asn handling for ntlmssp

Also removes obsolete distinction between rawntlmssp and ntlmssp (in asn/SPNEGO)
since as jra noted we can always send raw ntlmssp in session setup now.

remove check for experimental runtime flag (/proc/fs/cifs/Experimental) in
ntlmssp path.

Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/asn1.c     | 55 ++++++++++++++++++++++++++++++++++++++++++++++++------
 fs/cifs/cifsglob.h |  2 +-
 fs/cifs/cifssmb.c  |  2 +-
 fs/cifs/sess.c     |  2 +-
 4 files changed, 52 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 1b09f1670061..20692fbfdb24 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -49,6 +49,7 @@
 #define ASN1_OJI	6	/* Object Identifier  */
 #define ASN1_OJD	7	/* Object Description */
 #define ASN1_EXT	8	/* External */
+#define ASN1_ENUM	10	/* Enumerated */
 #define ASN1_SEQ	16	/* Sequence */
 #define ASN1_SET	17	/* Set */
 #define ASN1_NUMSTR	18	/* Numerical String */
@@ -78,10 +79,12 @@
 #define SPNEGO_OID_LEN 7
 #define NTLMSSP_OID_LEN  10
 #define KRB5_OID_LEN  7
+#define KRB5U2U_OID_LEN  8
 #define MSKRB5_OID_LEN  7
 static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
 static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
 static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
+static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
 static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
 
 /*
@@ -122,6 +125,28 @@ asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
 	return 1;
 }
 
+#if 0 /* will be needed later by spnego decoding/encoding of ntlmssp */
+static unsigned char
+asn1_enum_decode(struct asn1_ctx *ctx, __le32 *val)
+{
+	unsigned char ch;
+
+	if (ctx->pointer >= ctx->end) {
+		ctx->error = ASN1_ERR_DEC_EMPTY;
+		return 0;
+	}
+
+	ch = *(ctx->pointer)++; /* ch has 0xa, ptr points to lenght octet */
+	if ((ch) == ASN1_ENUM)  /* if ch value is ENUM, 0xa */
+		*val = *(++(ctx->pointer)); /* value has enum value */
+	else
+		return 0;
+
+	ctx->pointer++;
+	return 1;
+}
+#endif
+
 static unsigned char
 asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
 {
@@ -476,10 +501,9 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 	unsigned int cls, con, tag, oidlen, rc;
 	bool use_ntlmssp = false;
 	bool use_kerberos = false;
+	bool use_kerberosu2u = false;
 	bool use_mskerberos = false;
 
-	*secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
-
 	/* cifs_dump_mem(" Received SecBlob ", security_blob, length); */
 
 	asn1_open(&ctx, security_blob, length);
@@ -515,6 +539,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* SPNEGO */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding negTokenInit"));
 		return 0;
@@ -526,6 +551,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* negTokenInit */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding negTokenInit"));
 		return 0;
@@ -537,6 +563,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* sequence */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding 2nd part of negTokenInit"));
 		return 0;
@@ -548,6 +575,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* sequence of */
 	if (asn1_header_decode
 	    (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding 2nd part of negTokenInit"));
@@ -560,6 +588,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* list of security mechanisms */
 	while (!asn1_eoc_decode(&ctx, sequence_end)) {
 		rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
 		if (!rc) {
@@ -576,11 +605,15 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 
 				if (compare_oid(oid, oidlen, MSKRB5_OID,
 						MSKRB5_OID_LEN) &&
-						!use_kerberos)
+						!use_mskerberos)
 					use_mskerberos = true;
+				else if (compare_oid(oid, oidlen, KRB5U2U_OID,
+						     KRB5U2U_OID_LEN) &&
+						     !use_kerberosu2u)
+					use_kerberosu2u = true;
 				else if (compare_oid(oid, oidlen, KRB5_OID,
 						     KRB5_OID_LEN) &&
-						     !use_mskerberos)
+						     !use_kerberos)
 					use_kerberos = true;
 				else if (compare_oid(oid, oidlen, NTLMSSP_OID,
 						     NTLMSSP_OID_LEN))
@@ -593,7 +626,12 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		}
 	}
 
+	/* mechlistMIC */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		/* Check if we have reached the end of the blob, but with
+		   no mechListMic (e.g. NTLMSSP instead of KRB5) */
+		if (ctx.error == ASN1_ERR_DEC_EMPTY)
+			goto decode_negtoken_exit;
 		cFYI(1, ("Error decoding last part negTokenInit exit3"));
 		return 0;
 	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
@@ -602,6 +640,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 			 cls, con, tag, end, *end));
 		return 0;
 	}
+
+	/* sequence */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding last part negTokenInit exit5"));
 		return 0;
@@ -611,6 +651,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 			cls, con, tag, end, *end));
 	}
 
+	/* sequence of */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding last part negTokenInit exit 7"));
 		return 0;
@@ -619,6 +660,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 			 cls, con, tag, end, *end));
 		return 0;
 	}
+
+	/* general string */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding last part negTokenInit exit9"));
 		return 0;
@@ -630,13 +673,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 	}
 	cFYI(1, ("Need to call asn1_octets_decode() function for %s",
 		 ctx.pointer));	/* is this UTF-8 or ASCII? */
-
+decode_negtoken_exit:
 	if (use_kerberos)
 		*secType = Kerberos;
 	else if (use_mskerberos)
 		*secType = MSKerberos;
 	else if (use_ntlmssp)
-		*secType = NTLMSSP;
+		*secType = RawNTLMSSP;
 
 	return 1;
 }
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a61ab772c6f6..e1225e6ded2f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -83,7 +83,7 @@ enum securityEnum {
 	NTLM,			/* Legacy NTLM012 auth with NTLM hash */
 	NTLMv2,			/* Legacy NTLM auth with NTLMv2 hash */
 	RawNTLMSSP,		/* NTLMSSP without SPNEGO, NTLMv2 hash */
-	NTLMSSP,		/* NTLMSSP via SPNEGO, NTLMv2 hash */
+/*	NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */
 	Kerberos,		/* Kerberos via SPNEGO */
 	MSKerberos,		/* MS Kerberos via SPNEGO */
 };
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 58d65a9a79c3..61007c627497 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -594,7 +594,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	else if (secFlags & CIFSSEC_MAY_KRB5)
 		server->secType = Kerberos;
 	else if (secFlags & CIFSSEC_MAY_NTLMSSP)
-		server->secType = NTLMSSP;
+		server->secType = RawNTLMSSP;
 	else if (secFlags & CIFSSEC_MAY_LANMAN)
 		server->secType = LANMAN;
 /* #ifdef CONFIG_CIFS_EXPERIMENTAL
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 897a052270f9..7085a6275c4c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -802,7 +802,7 @@ ssetup_ntlmssp_authenticate:
 #endif /* CONFIG_CIFS_UPCALL */
 	} else {
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-		if ((experimEnabled > 1) && (type == RawNTLMSSP)) {
+		if (type == RawNTLMSSP) {
 			if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
 				cERROR(1, ("NTLMSSP requires Unicode support"));
 				rc = -ENOSYS;
-- 
cgit v1.2.3


From 0f3bc09ee1b7fcadd5bfdc5ed2e1643f658fe23d Mon Sep 17 00:00:00 2001
From: Suresh Jayaraman <sjayaraman@suse.de>
Date: Thu, 25 Jun 2009 18:12:34 +0530
Subject: cifs: Fix incorrect return code being printed in cFYI messages

FreeXid() along with freeing Xid does add a cifsFYI debug message that
prints rc (return code) as well. In some code paths where we set/return
error code after calling FreeXid(), incorrect error code is being
printed when cifsFYI is enabled.

This could be misleading in few cases. For eg.
In cifs_open() if cifs_fill_filedata() returns a valid pointer to
cifsFileInfo, FreeXid() prints rc=-13 whereas 0 is actually being
returned. Fix this by setting rc before calling FreeXid().

Basically convert

FreeXid(xid);			rc = -ERR;
return -ERR;		=>	FreeXid(xid);
				return rc;

[Note that Christoph would like to replace the GetXid/FreeXid
calls, which are primarily used for debugging.  This seems
like a good longer term goal, but although there is an
alternative tracing facility, there are no examples yet
available that I know of that we can use (yet) to
convert this cifs function entry/exit logging, and for
creating an identifier that we can use to correlate
all dmesg log entries for a particular vfs operation
(ie identify all log entries for a particular vfs
request to cifs: e.g. a particular close or read or write
or byte range lock call ... and just using the thread id
is harder).  Eventually when a replacement
for this is available (e.g. when NFS switches over and various
samples to look at in other file systems) we can remove the
GetXid/FreeXid macro but in the meantime multiple people
use this run time configurable logging all the time
for debugging, and Suresh's patch fixes a problem
which made it harder to notice some low
memory problems in the log so it is worthwhile
to fix this problem until a better logging
approach is able to be used]

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/dir.c   |  6 ++++--
 fs/cifs/file.c  | 24 ++++++++++++++++--------
 fs/cifs/inode.c | 15 ++++++++++-----
 fs/cifs/link.c  |  3 ++-
 fs/cifs/xattr.c | 12 ++++++++----
 5 files changed, 40 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3758965d73d5..7dc6b74f9def 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -307,8 +307,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	if (oplockEnabled)
@@ -540,8 +541,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 			buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 			if (buf == NULL) {
 				kfree(full_path);
+				rc = -ENOMEM;
 				FreeXid(xid);
-				return -ENOMEM;
+				return rc;
 			}
 
 			rc = CIFSSMBOpen(xid, pTcon, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 06866841b97f..ebdbe62a829c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -300,14 +300,16 @@ int cifs_open(struct inode *inode, struct file *file)
 	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
 	pCifsFile = cifs_fill_filedata(file);
 	if (pCifsFile) {
+		rc = 0;
 		FreeXid(xid);
-		return 0;
+		return rc;
 	}
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
@@ -494,8 +496,9 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
 	mutex_unlock(&pCifsFile->fh_mutex);
 	if (!pCifsFile->invalidHandle) {
 		mutex_lock(&pCifsFile->fh_mutex);
+		rc = 0;
 		FreeXid(xid);
-		return 0;
+		return rc;
 	}
 
 	if (file->f_path.dentry == NULL) {
@@ -845,8 +848,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 	tcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 	netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
 
@@ -1805,8 +1809,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 	pTcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 	open_file = (struct cifsFileInfo *)file->private_data;
 
@@ -1885,8 +1890,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 	pTcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 	open_file = (struct cifsFileInfo *)file->private_data;
 
@@ -2019,8 +2025,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 
 	xid = GetXid();
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 	open_file = (struct cifsFileInfo *)file->private_data;
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -2185,8 +2192,9 @@ static int cifs_readpage(struct file *file, struct page *page)
 	xid = GetXid();
 
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 
 	cFYI(1, ("readpage %p at offset %d 0x%x\n",
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index fad882b075ba..155c9e785d0c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -988,8 +988,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 	 * sb->s_vfs_rename_mutex here */
 	full_path = build_path_from_dentry(dentry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -1118,8 +1119,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	if ((pTcon->ses->capabilities & CAP_UNIX) &&
@@ -1303,8 +1305,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls,
@@ -1508,8 +1511,9 @@ int cifs_revalidate(struct dentry *direntry)
 	   since that would deadlock */
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
 		 "jiffies %ld", full_path, direntry->d_inode,
@@ -1911,8 +1915,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	/*
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index cd83c53fcbb5..fc1e0487eaee 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -172,8 +172,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 	full_path = build_path_from_dentry(direntry);
 
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	cFYI(1, ("Full path: %s", full_path));
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index e9527eedc639..a75afa3dd9e1 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -64,8 +64,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	if (ea_name == NULL) {
 		cFYI(1, ("Null xattr names not supported"));
@@ -118,8 +119,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	/* return dos attributes as pseudo xattr */
 	/* return alt name if available as pseudo attr */
@@ -225,8 +227,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	/* return dos attributes as pseudo xattr */
 	/* return alt name if available as pseudo attr */
@@ -351,8 +354,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	/* return dos attributes as pseudo xattr */
 	/* return alt name if available as pseudo attr */
-- 
cgit v1.2.3


From ad8034f19792736db5c259103c2eaaf72887bbb4 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 26 Jun 2009 03:25:49 +0000
Subject: [CIFS] remove bkl usage from umount begin

The lock_kernel call moved into the fs for umount_begin
is not needed.  This adds a check to make sure we don't
call umount_begin twice on the same fs.

umount_begin for cifs is probably not needed and
may eventually be able to be removed, but in
the meantime this smaller patch is safe and
gets rid of the bkl from this path which provides
some benefit.

Acked-by: Jeff Layton <redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index b5e9f398c2e5..9f669f982c4d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -537,9 +537,14 @@ static void cifs_umount_begin(struct super_block *sb)
 	if (tcon == NULL)
 		return;
 
-	lock_kernel();
 	read_lock(&cifs_tcp_ses_lock);
-	if (tcon->tc_count == 1)
+	if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) {
+		/* we have other mounts to same share or we have
+		   already tried to force umount this and woken up
+		   all waiting network requests, nothing to do */
+		read_unlock(&cifs_tcp_ses_lock);
+		return;
+	} else if (tcon->tc_count == 1)
 		tcon->tidStatus = CifsExiting;
 	read_unlock(&cifs_tcp_ses_lock);
 
@@ -554,9 +559,7 @@ static void cifs_umount_begin(struct super_block *sb)
 		wake_up_all(&tcon->ses->server->response_q);
 		msleep(1);
 	}
-/* BB FIXME - finish add checks for tidStatus BB */
 
-	unlock_kernel();
 	return;
 }
 
-- 
cgit v1.2.3


From 71a394faaad07090af5de5c075ec2f5bca0fbb35 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 26 Jun 2009 04:07:18 +0000
Subject: [CIFS] remove unknown mount option warning message

Jeff's previous patch which removed the unneeded rw/ro
parsing can cause a minor warning in dmesg (about the
unknown rw or ro mount option) at mount time. This
patch makes cifs ignore them in kernel to remove the warning
(they are already handled in the mount helper and VFS).

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 12c2cf693555..e16d7592116a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1196,6 +1196,10 @@ cifs_parse_mount_options(char *options, const char *devname,
 			/* ignore */
 		} else if (strnicmp(data, "guest", 5) == 0) {
 			/* ignore */
+		} else if (strnicmp(data, "rw", 2) == 0) {
+			/* ignore */
+		} else if (strnicmp(data, "ro", 2) == 0) {
+			/* ignore */
 		} else if (strnicmp(data, "noblocksend", 11) == 0) {
 			vol->noblocksnd = 1;
 		} else if (strnicmp(data, "noautotune", 10) == 0) {
-- 
cgit v1.2.3


From f0a71eb820596bd8f6abf64beb4cb181edaa2341 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sat, 27 Jun 2009 07:04:55 -0400
Subject: cifs: fix fh_mutex locking in cifs_reopen_file

Fixes a regression caused by commit a6ce4932fbdbcd8f8e8c6df76812014351c32892

When this lock was converted to a mutex, the locks were turned into
unlocks and vice-versa.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Shirish Pargaonkar <shirishp@us.ibm.com>
Cc: Stable Tree <stable@kernel.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/file.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ebdbe62a829c..97ce4bf89d15 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -493,9 +493,9 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
 		return -EBADF;
 
 	xid = GetXid();
-	mutex_unlock(&pCifsFile->fh_mutex);
+	mutex_lock(&pCifsFile->fh_mutex);
 	if (!pCifsFile->invalidHandle) {
-		mutex_lock(&pCifsFile->fh_mutex);
+		mutex_unlock(&pCifsFile->fh_mutex);
 		rc = 0;
 		FreeXid(xid);
 		return rc;
@@ -527,7 +527,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
 	if (full_path == NULL) {
 		rc = -ENOMEM;
 reopen_error_exit:
-		mutex_lock(&pCifsFile->fh_mutex);
+		mutex_unlock(&pCifsFile->fh_mutex);
 		FreeXid(xid);
 		return rc;
 	}
@@ -569,14 +569,14 @@ reopen_error_exit:
 			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
 				CIFS_MOUNT_MAP_SPECIAL_CHR);
 	if (rc) {
-		mutex_lock(&pCifsFile->fh_mutex);
+		mutex_unlock(&pCifsFile->fh_mutex);
 		cFYI(1, ("cifs_open returned 0x%x", rc));
 		cFYI(1, ("oplock: %d", oplock));
 	} else {
 reopen_success:
 		pCifsFile->netfid = netfid;
 		pCifsFile->invalidHandle = false;
-		mutex_lock(&pCifsFile->fh_mutex);
+		mutex_unlock(&pCifsFile->fh_mutex);
 		pCifsInode = CIFS_I(inode);
 		if (pCifsInode) {
 			if (can_flush) {
-- 
cgit v1.2.3


From 94e5d714f604d4cb4cb13163f01ede278e69258b Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 26 Jun 2009 14:05:27 -0400
Subject: integrity: add ima_counts_put (updated)

This patch fixes an imbalance message as reported by J.R. Okajima.
The IMA file counters are incremented in ima_path_check. If the
actual open fails, such as ETXTBSY, decrement the counters to
prevent unnecessary imbalance messages.

Reported-by: J.R. Okajima <hooanon05@yahoo.co.jp>
Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namei.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 5b961eb71cbf..f3c5b278895a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1761,6 +1761,10 @@ do_last:
 			goto exit;
 		}
 		filp = nameidata_to_filp(&nd, open_flag);
+		if (IS_ERR(filp))
+			ima_counts_put(&nd.path,
+				       acc_mode & (MAY_READ | MAY_WRITE |
+						   MAY_EXEC));
 		mnt_drop_write(nd.path.mnt);
 		if (nd.root.mnt)
 			path_put(&nd.root);
@@ -1817,6 +1821,9 @@ ok:
 		goto exit;
 	}
 	filp = nameidata_to_filp(&nd, open_flag);
+	if (IS_ERR(filp))
+		ima_counts_put(&nd.path,
+			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
 	/*
 	 * It is now safe to drop the mnt write
 	 * because the filp has had a write taken
-- 
cgit v1.2.3


From b4c458b3a23d76936e76678f2074b1528f129f7a Mon Sep 17 00:00:00 2001
From: Csaba Henk <csaba@gluster.com>
Date: Mon, 29 Jun 2009 03:26:53 +0200
Subject: fuse: fix return value of fuse_dev_write()

On 64 bit systems -- where sizeof(ssize_t) > sizeof(int) -- the following test
exposes a bug due to a non-careful return of an int or unsigned value:

implement a FUSE filesystem which sends an unsolicited notification to
the kernel with invalid opcode. The respective write to /dev/fuse
will return (1 << 32) - EINVAL with errno == 0 instead of -1 with
errno == EINVAL.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
---
 fs/fuse/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8fed2ed12f38..8a11a8c67c42 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -910,7 +910,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
 			       unsigned long nr_segs, loff_t pos)
 {
 	int err;
-	unsigned nbytes = iov_length(iov, nr_segs);
+	size_t nbytes = iov_length(iov, nr_segs);
 	struct fuse_req *req;
 	struct fuse_out_header oh;
 	struct fuse_copy_state cs;
-- 
cgit v1.2.3


From 201fa69a2849536ef2912e8e971ec0b01c04eff4 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 30 Jun 2009 20:06:24 +0200
Subject: fuse: fix bad return value in fuse_file_poll()

Fix fuse_file_poll() which returned a -errno value instead of a poll
mask.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
---
 fs/fuse/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fce6ce694fde..cbc464043b6f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
 
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
-		return PTR_ERR(req);
+		return POLLERR;
 
 	req->in.h.opcode = FUSE_POLL;
 	req->in.h.nodeid = ff->nodeid;
-- 
cgit v1.2.3


From e0a43ddcc08c34dbd666d93600fd23914505f4aa Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 30 Jun 2009 20:12:23 +0200
Subject: fuse: allow umask processing in userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch lets filesystems handle masking the file mode on creation.
This is needed if filesystem is using ACLs.

 - The CREATE, MKDIR and MKNOD requests are extended with a "umask"
   parameter.

 - A new FUSE_DONT_MASK flag is added to the INIT request/reply.  With
   this the filesystem may request that the create mode is not masked.

CC: Jean-Pierre André <jean-pierre.andre@wanadoo.fr>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c    | 20 +++++++++++++++++---
 fs/fuse/fuse_i.h |  3 +++
 fs/fuse/inode.c  |  9 ++++++++-
 3 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b3089a083d30..6b700734e519 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req;
 	struct fuse_req *forget_req;
-	struct fuse_open_in inarg;
+	struct fuse_create_in inarg;
 	struct fuse_open_out outopen;
 	struct fuse_entry_out outentry;
 	struct fuse_file *ff;
@@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	if (!ff)
 		goto out_put_request;
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	flags &= ~O_NOCTTY;
 	memset(&inarg, 0, sizeof(inarg));
 	memset(&outentry, 0, sizeof(outentry));
 	inarg.flags = flags;
 	inarg.mode = mode;
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_CREATE;
 	req->in.h.nodeid = get_node_id(dir);
 	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+						sizeof(inarg);
 	req->in.args[0].value = &inarg;
 	req->in.args[1].size = entry->d_name.len + 1;
 	req->in.args[1].value = entry->d_name.name;
@@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
 	inarg.rdev = new_encode_dev(rdev);
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_MKNOD;
 	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+						sizeof(inarg);
 	req->in.args[0].value = &inarg;
 	req->in.args[1].size = entry->d_name.len + 1;
 	req->in.args[1].value = entry->d_name.name;
@@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_MKDIR;
 	req->in.numargs = 2;
 	req->in.args[0].size = sizeof(inarg);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aaf2f9ff970e..ede4f77b2d6c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -446,6 +446,9 @@ struct fuse_conn {
 	/** Do multi-page cached writes */
 	unsigned big_writes:1;
 
+	/** Don't apply umask to creation modes */
+	unsigned dont_mask:1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8673ccf90b7..6cc501bd0187 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -725,6 +725,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 			}
 			if (arg->flags & FUSE_BIG_WRITES)
 				fc->big_writes = 1;
+			if (arg->flags & FUSE_DONT_MASK)
+				fc->dont_mask = 1;
 		} else {
 			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 			fc->no_lock = 1;
@@ -748,7 +750,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
+		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
@@ -864,6 +866,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto err_put_conn;
 
+	/* Handle umasking inside the fuse code */
+	if (sb->s_flags & MS_POSIXACL)
+		fc->dont_mask = 1;
+	sb->s_flags |= MS_POSIXACL;
+
 	fc->release = fuse_free_conn;
 	fc->flags = d.flags;
 	fc->user_id = d.user_id;
-- 
cgit v1.2.3


From 3b463ae0c6264f70e5d4c0a9c46af20fed43c96e Mon Sep 17 00:00:00 2001
From: John Muir <muirj@nortel.com>
Date: Sun, 31 May 2009 11:13:57 -0400
Subject: fuse: invalidation reverse calls

Add notification messages that allow the filesystem to invalidate VFS
caches.

Two notifications are added:

 1) inode invalidation

   - invalidate cached attributes
   - invalidate a range of pages in the page cache (this is optional)

 2) dentry invalidation

   - try to invalidate a subtree in the dentry cache

Care must be taken while accessing the 'struct super_block' for the
mount, as it can go away while an invalidation is in progress.  To
prevent this, introduce a rw-semaphore, that is taken for read during
the invalidation and taken for write in the ->kill_sb callback.

Cc: Csaba Henk <csaba@gluster.com>
Cc: Anand Avati <avati@zresearch.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c    | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/dir.c    | 37 ++++++++++++++++++++++++++
 fs/fuse/fuse_i.h | 24 +++++++++++++++++
 fs/fuse/inode.c  | 59 ++++++++++++++++++++++++++++++++++++++---
 4 files changed, 198 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8a11a8c67c42..f58ecbc416c8 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -849,6 +849,81 @@ err:
 	return err;
 }
 
+static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
+				   struct fuse_copy_state *cs)
+{
+	struct fuse_notify_inval_inode_out outarg;
+	int err = -EINVAL;
+
+	if (size != sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (!fc->sb)
+		goto err_unlock;
+
+	err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
+				       outarg.off, outarg.len);
+
+err_unlock:
+	up_read(&fc->killsb);
+	return err;
+
+err:
+	fuse_copy_finish(cs);
+	return err;
+}
+
+static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
+				   struct fuse_copy_state *cs)
+{
+	struct fuse_notify_inval_entry_out outarg;
+	int err = -EINVAL;
+	char buf[FUSE_NAME_MAX+1];
+	struct qstr name;
+
+	if (size < sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+
+	err = -ENAMETOOLONG;
+	if (outarg.namelen > FUSE_NAME_MAX)
+		goto err;
+
+	name.name = buf;
+	name.len = outarg.namelen;
+	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+	buf[outarg.namelen] = 0;
+	name.hash = full_name_hash(name.name, name.len);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (!fc->sb)
+		goto err_unlock;
+
+	err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+
+err_unlock:
+	up_read(&fc->killsb);
+	return err;
+
+err:
+	fuse_copy_finish(cs);
+	return err;
+}
+
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 		       unsigned int size, struct fuse_copy_state *cs)
 {
@@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 	case FUSE_NOTIFY_POLL:
 		return fuse_notify_poll(fc, size, cs);
 
+	case FUSE_NOTIFY_INVAL_INODE:
+		return fuse_notify_inval_inode(fc, size, cs);
+
+	case FUSE_NOTIFY_INVAL_ENTRY:
+		return fuse_notify_inval_entry(fc, size, cs);
+
 	default:
 		fuse_copy_finish(cs);
 		return -EINVAL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 6b700734e519..e703654e7f40 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -859,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
 	return err;
 }
 
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+			     struct qstr *name)
+{
+	int err = -ENOTDIR;
+	struct inode *parent;
+	struct dentry *dir;
+	struct dentry *entry;
+
+	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
+	if (!parent)
+		return -ENOENT;
+
+	mutex_lock(&parent->i_mutex);
+	if (!S_ISDIR(parent->i_mode))
+		goto unlock;
+
+	err = -ENOENT;
+	dir = d_find_alias(parent);
+	if (!dir)
+		goto unlock;
+
+	entry = d_lookup(dir, name);
+	dput(dir);
+	if (!entry)
+		goto unlock;
+
+	fuse_invalidate_attr(parent);
+	fuse_invalidate_entry(entry);
+	dput(entry);
+	err = 0;
+
+ unlock:
+	mutex_unlock(&parent->i_mutex);
+	iput(parent);
+	return err;
+}
+
 /*
  * Calling into a user-controlled filesystem gives the filesystem
  * daemon ptrace-like capabilities over the requester process.  This
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ede4f77b2d6c..52b641fc0faf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -484,6 +484,12 @@ struct fuse_conn {
 
 	/** Called on final put */
 	void (*release)(struct fuse_conn *);
+
+	/** Super block for this connection. */
+	struct super_block *sb;
+
+	/** Read/write semaphore to hold when accessing sb. */
+	struct rw_semaphore killsb;
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -511,6 +517,11 @@ extern const struct file_operations fuse_dev_operations;
 
 extern const struct dentry_operations fuse_dentry_operations;
 
+/**
+ * Inode to nodeid comparison.
+ */
+int fuse_inode_eq(struct inode *inode, void *_nodeidp);
+
 /**
  * Get a filled in inode
  */
@@ -711,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
 
+/**
+ * File-system tells the kernel to invalidate cache for the given node id.
+ */
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+			     loff_t offset, loff_t len);
+
+/**
+ * File-system tells the kernel to invalidate parent attributes and
+ * the dentry matching parent/name.
+ */
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+			     struct qstr *name);
+
 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 		 bool isdir);
 ssize_t fuse_direct_io(struct file *file, const char __user *buf,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6cc501bd0187..f91ccc4a189d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 		BUG();
 }
 
-static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+int fuse_inode_eq(struct inode *inode, void *_nodeidp)
 {
 	u64 nodeid = *(u64 *) _nodeidp;
 	if (get_node_id(inode) == nodeid)
@@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 	return inode;
 }
 
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+			     loff_t offset, loff_t len)
+{
+	struct inode *inode;
+	pgoff_t pg_start;
+	pgoff_t pg_end;
+
+	inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
+	if (!inode)
+		return -ENOENT;
+
+	fuse_invalidate_attr(inode);
+	if (offset >= 0) {
+		pg_start = offset >> PAGE_CACHE_SHIFT;
+		if (len <= 0)
+			pg_end = -1;
+		else
+			pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+		invalidate_inode_pages2_range(inode->i_mapping,
+					      pg_start, pg_end);
+	}
+	iput(inode);
+	return 0;
+}
+
 static void fuse_umount_begin(struct super_block *sb)
 {
 	fuse_abort_conn(get_fuse_conn_super(sb));
@@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc)
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	mutex_init(&fc->inst_mutex);
+	init_rwsem(&fc->killsb);
 	atomic_set(&fc->count, 1);
 	init_waitqueue_head(&fc->waitq);
 	init_waitqueue_head(&fc->blocked_waitq);
@@ -862,6 +888,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	fuse_conn_init(fc);
 
 	fc->dev = sb->s_dev;
+	fc->sb = sb;
 	err = fuse_bdi_init(fc, sb);
 	if (err)
 		goto err_put_conn;
@@ -948,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type,
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	if (fc) {
+		down_write(&fc->killsb);
+		fc->sb = NULL;
+		up_write(&fc->killsb);
+	}
+
+	kill_anon_super(sb);
+}
+
 static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
 	.fs_flags	= FS_HAS_SUBTYPE,
 	.get_sb		= fuse_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= fuse_kill_sb_anon,
 };
 
 #ifdef CONFIG_BLOCK
@@ -965,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
 			   mnt);
 }
 
+static void fuse_kill_sb_blk(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	if (fc) {
+		down_write(&fc->killsb);
+		fc->sb = NULL;
+		up_write(&fc->killsb);
+	}
+
+	kill_block_super(sb);
+}
+
 static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
 	.get_sb		= fuse_get_sb_blk,
-	.kill_sb	= kill_block_super,
+	.kill_sb	= fuse_kill_sb_blk,
 	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
 
-- 
cgit v1.2.3


From f7c2df9b55212d5ec94169a4de11e44c683e0af4 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Tue, 30 Jun 2009 21:10:13 +0100
Subject: AFS: Fix lock imbalance

Don't unlock on vfs_rejected_lock path in afs_do_setlk, since the lock
is unlocked after abort_attempt label.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/flock.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 210acafe4a9b..3ff8bdd18fb3 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -432,7 +432,6 @@ vfs_rejected_lock:
 	list_del_init(&fl->fl_u.afs.link);
 	if (list_empty(&vnode->granted_locks))
 		afs_defer_unlock(vnode, key);
-	spin_unlock(&vnode->lock);
 	goto abort_attempt;
 }
 
-- 
cgit v1.2.3


From 133890103b9de08904f909995973e4b5c08a780e Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Tue, 30 Jun 2009 11:41:11 -0700
Subject: eventfd: revised interface and cleanups

Change the eventfd interface to de-couple the eventfd memory context, from
the file pointer instance.

Without such change, there is no clean way to racely free handle the
POLLHUP event sent when the last instance of the file* goes away.  Also,
now the internal eventfd APIs are using the eventfd context instead of the
file*.

This patch is required by KVM's IRQfd code, which is still under
development.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Gregory Haskins <ghaskins@novell.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c     |  24 ++++--------
 fs/eventfd.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 117 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 76da12537956..d065b2c3273e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 {
 	assert_spin_locked(&ctx->ctx_lock);
 
+	if (req->ki_eventfd != NULL)
+		eventfd_ctx_put(req->ki_eventfd);
 	if (req->ki_dtor)
 		req->ki_dtor(req);
 	if (req->ki_iovec != &req->ki_inline_vec)
@@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data)
 		/* Complete the fput(s) */
 		if (req->ki_filp != NULL)
 			__fput(req->ki_filp);
-		if (req->ki_eventfd != NULL)
-			__fput(req->ki_eventfd);
 
 		/* Link the iocb into the context's free list */
 		spin_lock_irq(&ctx->ctx_lock);
@@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data)
  */
 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 {
-	int schedule_putreq = 0;
-
 	dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
 		req, atomic_long_read(&req->ki_filp->f_count));
 
@@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	 * we would not be holding the last reference to the file*, so
 	 * this function will be executed w/out any aio kthread wakeup.
 	 */
-	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
-		schedule_putreq++;
-	else
-		req->ki_filp = NULL;
-	if (req->ki_eventfd != NULL) {
-		if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
-			schedule_putreq++;
-		else
-			req->ki_eventfd = NULL;
-	}
-	if (unlikely(schedule_putreq)) {
+	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
 		spin_unlock(&fput_lock);
 		queue_work(aio_wq, &fput_work);
-	} else
+	} else {
+		req->ki_filp = NULL;
 		really_put_req(ctx, req);
+	}
 	return 1;
 }
 
@@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		 * an eventfd() fd, and will be signaled for each completed
 		 * event using the eventfd_signal() function.
 		 */
-		req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
+		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
 		if (IS_ERR(req->ki_eventfd)) {
 			ret = PTR_ERR(req->ki_eventfd);
 			req->ki_eventfd = NULL;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3f0e1974abdc..31d12de83a2a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -14,35 +14,44 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/anon_inodes.h>
-#include <linux/eventfd.h>
 #include <linux/syscalls.h>
 #include <linux/module.h>
+#include <linux/kref.h>
+#include <linux/eventfd.h>
 
 struct eventfd_ctx {
+	struct kref kref;
 	wait_queue_head_t wqh;
 	/*
 	 * Every time that a write(2) is performed on an eventfd, the
 	 * value of the __u64 being written is added to "count" and a
 	 * wakeup is performed on "wqh". A read(2) will return the "count"
 	 * value to userspace, and will reset "count" to zero. The kernel
-	 * size eventfd_signal() also, adds to the "count" counter and
+	 * side eventfd_signal() also, adds to the "count" counter and
 	 * issue a wakeup.
 	 */
 	__u64 count;
 	unsigned int flags;
 };
 
-/*
- * Adds "n" to the eventfd counter "count". Returns "n" in case of
- * success, or a value lower then "n" in case of coutner overflow.
- * This function is supposed to be called by the kernel in paths
- * that do not allow sleeping. In this function we allow the counter
- * to reach the ULLONG_MAX value, and we signal this as overflow
- * condition by returining a POLLERR to poll(2).
+/**
+ * eventfd_signal - Adds @n to the eventfd counter.
+ * @ctx: [in] Pointer to the eventfd context.
+ * @n: [in] Value of the counter to be added to the eventfd internal counter.
+ *          The value cannot be negative.
+ *
+ * This function is supposed to be called by the kernel in paths that do not
+ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+ * value, and we signal this as overflow condition by returining a POLLERR
+ * to poll(2).
+ *
+ * Returns @n in case of success, a non-negative number lower than @n in case
+ * of overflow, or the following error codes:
+ *
+ * -EINVAL    : The value of @n is negative.
  */
-int eventfd_signal(struct file *file, int n)
+int eventfd_signal(struct eventfd_ctx *ctx, int n)
 {
-	struct eventfd_ctx *ctx = file->private_data;
 	unsigned long flags;
 
 	if (n < 0)
@@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n)
 }
 EXPORT_SYMBOL_GPL(eventfd_signal);
 
+static void eventfd_free(struct kref *kref)
+{
+	struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
+
+	kfree(ctx);
+}
+
+/**
+ * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
+ * @ctx: [in] Pointer to the eventfd context.
+ *
+ * Returns: In case of success, returns a pointer to the eventfd context.
+ */
+struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
+{
+	kref_get(&ctx->kref);
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_get);
+
+/**
+ * eventfd_ctx_put - Releases a reference to the internal eventfd context.
+ * @ctx: [in] Pointer to eventfd context.
+ *
+ * The eventfd context reference must have been previously acquired either
+ * with eventfd_ctx_get() or eventfd_ctx_fdget()).
+ */
+void eventfd_ctx_put(struct eventfd_ctx *ctx)
+{
+	kref_put(&ctx->kref, eventfd_free);
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_put);
+
 static int eventfd_release(struct inode *inode, struct file *file)
 {
-	kfree(file->private_data);
+	struct eventfd_ctx *ctx = file->private_data;
+
+	wake_up_poll(&ctx->wqh, POLLHUP);
+	eventfd_ctx_put(ctx);
 	return 0;
 }
 
@@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = {
 	.write		= eventfd_write,
 };
 
+/**
+ * eventfd_fget - Acquire a reference of an eventfd file descriptor.
+ * @fd: [in] Eventfd file descriptor.
+ *
+ * Returns a pointer to the eventfd file structure in case of success, or the
+ * following error pointer:
+ *
+ * -EBADF    : Invalid @fd file descriptor.
+ * -EINVAL   : The @fd file descriptor is not an eventfd file.
+ */
 struct file *eventfd_fget(int fd)
 {
 	struct file *file;
@@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd)
 }
 EXPORT_SYMBOL_GPL(eventfd_fget);
 
+/**
+ * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
+ * @fd: [in] Eventfd file descriptor.
+ *
+ * Returns a pointer to the internal eventfd context, otherwise the error
+ * pointers returned by the following functions:
+ *
+ * eventfd_fget
+ */
+struct eventfd_ctx *eventfd_ctx_fdget(int fd)
+{
+	struct file *file;
+	struct eventfd_ctx *ctx;
+
+	file = eventfd_fget(fd);
+	if (IS_ERR(file))
+		return (struct eventfd_ctx *) file;
+	ctx = eventfd_ctx_get(file->private_data);
+	fput(file);
+
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
+
+/**
+ * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
+ * @file: [in] Eventfd file pointer.
+ *
+ * Returns a pointer to the internal eventfd context, otherwise the error
+ * pointer:
+ *
+ * -EINVAL   : The @fd file descriptor is not an eventfd file.
+ */
+struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
+{
+	if (file->f_op != &eventfd_fops)
+		return ERR_PTR(-EINVAL);
+
+	return eventfd_ctx_get(file->private_data);
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
+
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {
 	int fd;
@@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 	if (!ctx)
 		return -ENOMEM;
 
+	kref_init(&ctx->kref);
 	init_waitqueue_head(&ctx->wqh);
 	ctx->count = count;
 	ctx->flags = flags;
-- 
cgit v1.2.3


From 341c87bf346f57748230628c5ad6ee69219250e8 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 30 Jun 2009 11:41:23 -0700
Subject: elf: limit max map count to safe value

With ELF, at generating coredump, some more headers other than used
vmas are added.

When max_map_count == 65536, a core generated by following kinds of
code can be unreadable because the number of ELF's program header is
written in 16bit in Ehdr (please see elf.h) and the number overflows.

==
	... = mmap(); (munmap, mprotect, etc...)
	if (failed)
		abort();
==

This can happen in mmap/munmap/mprotect/etc...which calls split_vma().

I think 65536 is not safe as _default_ and reduce it to 65530 is good
for avoiding unexpected corrupted core.

Anyway, max_map_count can be enlarged by sysctl if a user is brave..

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Jakub Jelinek <jakub@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9fa212b014a5..f1867900e459 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
 	if (!elf)
 		goto out;
-	
+	/*
+	 * The number of segs are recored into ELF header as 16bit value.
+	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
+	 */
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
 	segs += ELF_CORE_EXTRA_PHDRS;
-- 
cgit v1.2.3


From 4d6c13f87db12ae1ce35ea6a15688ac72419b133 Mon Sep 17 00:00:00 2001
From: Bryan Donlan <bdonlan@gmail.com>
Date: Tue, 30 Jun 2009 11:41:24 -0700
Subject: ext2: return -EIO not -ESTALE on directory traversal through deleted
 inode

ext2_iget() returns -ESTALE if invoked on a deleted inode, in order to
report errors to NFS properly.  However, in ext[234]_lookup(), this
-ESTALE can be propagated to userspace if the filesystem is corrupted such
that a directory entry references a deleted inode.  This leads to a
misleading error message - "Stale NFS file handle" - and confusion on the
part of the admin.

The bug can be easily reproduced by creating a new filesystem, making a
link to an unused inode using debugfs, then mounting and attempting to ls
-l said link.

This patch thus changes ext2_lookup to return -EIO if it receives -ESTALE
from ext2_iget(), as ext2 does for other filesystem metadata corruption;
and also invokes the appropriate ext*_error functions when this case is
detected.

Signed-off-by: Bryan Donlan <bdonlan@gmail.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext2/namei.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 6524ecaebb7a..e1dedb0f7873 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
 	inode = NULL;
 	if (ino) {
 		inode = ext2_iget(dir->i_sb, ino);
-		if (IS_ERR(inode))
-			return ERR_CAST(inode);
+		if (unlikely(IS_ERR(inode))) {
+			if (PTR_ERR(inode) == -ESTALE) {
+				ext2_error(dir->i_sb, __func__,
+						"deleted inode referenced: %lu",
+						ino);
+				return ERR_PTR(-EIO);
+			} else {
+				return ERR_CAST(inode);
+			}
+		}
 	}
 	return d_splice_alias(inode, dentry);
 }
-- 
cgit v1.2.3


From 752fa51e4c5182c3c257f1cede90577a7e213c58 Mon Sep 17 00:00:00 2001
From: Wolfgang Illmeyer <wolfgang@illmeyer.com>
Date: Tue, 30 Jun 2009 11:41:44 -0700
Subject: hostfs: set maximum filesize in superblock for proper LFS support

Maximum file size for hostfs mounts defaults to 2GB, so bigger files cannot be
read/written through hostfs. This patch initializes the maximum file size to
MAX_LFS_SIZE.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13531

Signed-off-by: Wolfgang Illmeyer <wolfgang@illmeyer.com>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hostfs/hostfs_kern.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fe02ad4740e7..032604e5ef2c 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = HOSTFS_SUPER_MAGIC;
 	sb->s_op = &hostfs_sbops;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
 
 	/* NULL is printed as <NULL> by sprintf: avoid that. */
 	if (req_root == NULL)
-- 
cgit v1.2.3


From 7878cba9f0037f5599004b03a1260b32d9050360 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Jun 2009 15:37:49 +0200
Subject: block: Create bip slabs with embedded integrity vectors

This patch restores stacking ability to the block layer integrity
infrastructure by creating a set of dedicated bip slabs.  Each bip slab
has an embedded bio_vec array at the end.  This cuts down on memory
allocations and also simplifies the code compared to the original bvec
version.  Only the largest bip slab is backed by a mempool.  The pool is
contained in the bio_set so stacking drivers can ensure forward
progress.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@carl.(none)>
---
 fs/bio-integrity.c | 170 +++++++++++++++++++++++++++++++++++++++--------------
 fs/bio.c           |  11 +++-
 2 files changed, 133 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 31c46a241bac..49a34e7f7306 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -1,7 +1,7 @@
 /*
  * bio-integrity.c - bio data integrity extensions
  *
- * Copyright (C) 2007, 2008 Oracle Corporation
+ * Copyright (C) 2007, 2008, 2009 Oracle Corporation
  * Written by: Martin K. Petersen <martin.petersen@oracle.com>
  *
  * This program is free software; you can redistribute it and/or
@@ -25,63 +25,121 @@
 #include <linux/bio.h>
 #include <linux/workqueue.h>
 
-static struct kmem_cache *bio_integrity_slab __read_mostly;
-static mempool_t *bio_integrity_pool;
-static struct bio_set *integrity_bio_set;
+struct integrity_slab {
+	struct kmem_cache *slab;
+	unsigned short nr_vecs;
+	char name[8];
+};
+
+#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
+struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
+	IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
+};
+#undef IS
+
 static struct workqueue_struct *kintegrityd_wq;
 
+static inline unsigned int vecs_to_idx(unsigned int nr)
+{
+	switch (nr) {
+	case 1:
+		return 0;
+	case 2 ... 4:
+		return 1;
+	case 5 ... 16:
+		return 2;
+	case 17 ... 64:
+		return 3;
+	case 65 ... 128:
+		return 4;
+	case 129 ... BIO_MAX_PAGES:
+		return 5;
+	default:
+		BUG();
+	}
+}
+
+static inline int use_bip_pool(unsigned int idx)
+{
+	if (idx == BIOVEC_NR_POOLS)
+		return 1;
+
+	return 0;
+}
+
 /**
- * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
  * @bio:	bio to attach integrity metadata to
  * @gfp_mask:	Memory allocation mask
  * @nr_vecs:	Number of integrity metadata scatter-gather elements
+ * @bs:		bio_set to allocate from
  *
  * Description: This function prepares a bio for attaching integrity
  * metadata.  nr_vecs specifies the maximum number of pages containing
  * integrity metadata that can be attached.
  */
-struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
-						  gfp_t gfp_mask,
-						  unsigned int nr_vecs)
+struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
+							 gfp_t gfp_mask,
+							 unsigned int nr_vecs,
+							 struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip;
-	struct bio_vec *iv;
-	unsigned long idx;
+	unsigned int idx = vecs_to_idx(nr_vecs);
 
 	BUG_ON(bio == NULL);
+	bip = NULL;
 
-	bip = mempool_alloc(bio_integrity_pool, gfp_mask);
-	if (unlikely(bip == NULL)) {
-		printk(KERN_ERR "%s: could not alloc bip\n", __func__);
-		return NULL;
-	}
+	/* Lower order allocations come straight from slab */
+	if (!use_bip_pool(idx))
+		bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
 
-	memset(bip, 0, sizeof(*bip));
+	/* Use mempool if lower order alloc failed or max vecs were requested */
+	if (bip == NULL) {
+		bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
 
-	iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set);
-	if (unlikely(iv == NULL)) {
-		printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
-		mempool_free(bip, bio_integrity_pool);
-		return NULL;
+		if (unlikely(bip == NULL)) {
+			printk(KERN_ERR "%s: could not alloc bip\n", __func__);
+			return NULL;
+		}
 	}
 
-	bip->bip_pool = idx;
-	bip->bip_vec = iv;
+	memset(bip, 0, sizeof(*bip));
+
+	bip->bip_slab = idx;
 	bip->bip_bio = bio;
 	bio->bi_integrity = bip;
 
 	return bip;
 }
+EXPORT_SYMBOL(bio_integrity_alloc_bioset);
+
+/**
+ * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * @bio:	bio to attach integrity metadata to
+ * @gfp_mask:	Memory allocation mask
+ * @nr_vecs:	Number of integrity metadata scatter-gather elements
+ *
+ * Description: This function prepares a bio for attaching integrity
+ * metadata.  nr_vecs specifies the maximum number of pages containing
+ * integrity metadata that can be attached.
+ */
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
+						  gfp_t gfp_mask,
+						  unsigned int nr_vecs)
+{
+	return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
+}
 EXPORT_SYMBOL(bio_integrity_alloc);
 
 /**
  * bio_integrity_free - Free bio integrity payload
  * @bio:	bio containing bip to be freed
+ * @bs:		bio_set this bio was allocated from
  *
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-void bio_integrity_free(struct bio *bio)
+void bio_integrity_free(struct bio *bio, struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 
@@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio)
 	    && bip->bip_buf != NULL)
 		kfree(bip->bip_buf);
 
-	bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool);
-	mempool_free(bip, bio_integrity_pool);
+	if (use_bip_pool(bip->bip_slab))
+		mempool_free(bip, bs->bio_integrity_pool);
+	else
+		kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
 
 	bio->bi_integrity = NULL;
 }
@@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 	struct bio_vec *iv;
 
-	if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
+	if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
 		printk(KERN_ERR "%s: bip_vec full\n", __func__);
 		return 0;
 	}
@@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
 	bp->iv1 = bip->bip_vec[0];
 	bp->iv2 = bip->bip_vec[0];
 
-	bp->bip1.bip_vec = &bp->iv1;
-	bp->bip2.bip_vec = &bp->iv2;
+	bp->bip1.bip_vec[0] = bp->iv1;
+	bp->bip2.bip_vec[0] = bp->iv2;
 
 	bp->iv1.bv_len = sectors * bi->tuple_size;
 	bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split);
  * @bio:	New bio
  * @bio_src:	Original bio
  * @gfp_mask:	Memory allocation mask
+ * @bs:		bio_set to allocate bip from
  *
  * Description:	Called to allocate a bip when cloning a bio
  */
-int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+			gfp_t gfp_mask, struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
 	struct bio_integrity_payload *bip;
 
 	BUG_ON(bip_src == NULL);
 
-	bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
+	bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
 
 	if (bip == NULL)
 		return -EIO;
@@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(bio_integrity_clone);
 
-static int __init bio_integrity_init(void)
+int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
-	kintegrityd_wq = create_workqueue("kintegrityd");
+	unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
+
+	bs->bio_integrity_pool =
+		mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
 
+	if (!bs->bio_integrity_pool)
+		return -1;
+
+	return 0;
+}
+EXPORT_SYMBOL(bioset_integrity_create);
+
+void bioset_integrity_free(struct bio_set *bs)
+{
+	if (bs->bio_integrity_pool)
+		mempool_destroy(bs->bio_integrity_pool);
+}
+EXPORT_SYMBOL(bioset_integrity_free);
+
+void __init bio_integrity_init(void)
+{
+	unsigned int i;
+
+	kintegrityd_wq = create_workqueue("kintegrityd");
 	if (!kintegrityd_wq)
 		panic("Failed to create kintegrityd\n");
 
-	bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
-					SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+	for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
+		unsigned int size;
 
-	bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE,
-						      bio_integrity_slab);
-	if (!bio_integrity_pool)
-		panic("bio_integrity: can't allocate bip pool\n");
+		size = sizeof(struct bio_integrity_payload)
+			+ bip_slab[i].nr_vecs * sizeof(struct bio_vec);
 
-	integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0);
-	if (!integrity_bio_set)
-		panic("bio_integrity: can't allocate bio_set\n");
-
-	return 0;
+		bip_slab[i].slab =
+			kmem_cache_create(bip_slab[i].name, size, 0,
+					  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+	}
 }
-subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 24c914043532..1486b19fc431 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
 		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
-		bio_integrity_free(bio);
+		bio_integrity_free(bio, bs);
 
 	/*
 	 * If we have front padding, adjust the bio pointer before freeing
@@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
 static void bio_kmalloc_destructor(struct bio *bio)
 {
 	if (bio_integrity(bio))
-		bio_integrity_free(bio);
+		bio_integrity_free(bio, fs_bio_set);
 	kfree(bio);
 }
 
@@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
 	if (bio_integrity(bio)) {
 		int ret;
 
-		ret = bio_integrity_clone(b, bio, gfp_mask);
+		ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
 
 		if (ret < 0) {
 			bio_put(b);
@@ -1539,6 +1539,7 @@ void bioset_free(struct bio_set *bs)
 	if (bs->bio_pool)
 		mempool_destroy(bs->bio_pool);
 
+	bioset_integrity_free(bs);
 	biovec_free_pools(bs);
 	bio_put_slab(bs);
 
@@ -1579,6 +1580,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 	if (!bs->bio_pool)
 		goto bad;
 
+	if (bioset_integrity_create(bs, pool_size))
+		goto bad;
+
 	if (!biovec_create_pools(bs, pool_size))
 		return bs;
 
@@ -1616,6 +1620,7 @@ static int __init init_bio(void)
 	if (!bio_slabs)
 		panic("bio: can't allocate bios\n");
 
+	bio_integrity_init();
 	biovec_init_slabs();
 
 	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
-- 
cgit v1.2.3


From e2dbe12557d85d81f4527879499f55681c3cca4f Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 1 Jul 2009 01:06:26 -0400
Subject: elf: fix one check-after-use

Check before use it.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f1867900e459..b7c1603cd4bd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	info->thread = NULL;
 
 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
-	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
-
 	if (psinfo == NULL)
 		return 0;
 
+	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+
 	/*
 	 * Figure out how many notes we're going to need for each thread.
 	 */
-- 
cgit v1.2.3


From cc0bad7552308e8905d6ea56e6b7811fa67e716d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 25 Jun 2009 00:56:52 -0400
Subject: cifs: add new cifs_iget function and convert unix codepath to use it

cifs: add new cifs_iget function and convert unix codepath to use it

In order to unify some codepaths, introduce a common cifs_fattr struct
for storing inode attributes. The different codepaths (unix, legacy,
normal, etc...) can fill out this struct with inode info. It can then be
passed as an arg to a common set of routines to get and update inodes.

Add a new cifs_iget function that uses iget5_locked to identify inodes.
This will compare inodes based on the uniqueid value in a cifs_fattr
struct.

Rather than filling out an already-created inode, have
cifs_get_inode_info_unix instead fill out cifs_fattr and hand that off
to cifs_iget. cifs_iget can then properly look for hardlinked inodes.

On the readdir side, add a new cifs_readdir_lookup function that spawns
populated dentries. Redefine FILE_UNIX_INFO so that it's basically a
FILE_UNIX_BASIC_INFO that has a few fields wrapped around it. This
allows us to more easily use the same function for filling out the fattr
as the non-readdir codepath.

With this, we should then have proper hardlink detection and can
eventually get rid of some nasty CIFS-specific hacks for handing them.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.h    |  13 ++
 fs/cifs/cifsglob.h  |  25 ++++
 fs/cifs/cifspdu.h   |  14 +-
 fs/cifs/cifsproto.h |   9 +-
 fs/cifs/dir.c       |  22 +--
 fs/cifs/inode.c     | 380 ++++++++++++++++++++++++++--------------------------
 fs/cifs/readdir.c   | 253 +++++++++++++---------------------
 7 files changed, 339 insertions(+), 377 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9570a0e8023f..586df24c9abb 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -24,6 +24,19 @@
 
 #define ROOT_I 2
 
+/*
+ * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
+ * so that it will fit.
+ */
+static inline ino_t
+cifs_uniqueid_to_ino_t(u64 fileid)
+{
+	ino_t ino = (ino_t) fileid;
+	if (sizeof(ino_t) < sizeof(u64))
+		ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8;
+	return ino;
+}
+
 extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;
 extern const struct address_space_operations cifs_addr_ops_smallbuf;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e1225e6ded2f..e6435cba8113 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -371,6 +371,7 @@ struct cifsInodeInfo {
 	bool oplockPending:1;
 	bool delete_pending:1;		/* DELETE_ON_CLOSE is set */
 	u64  server_eof;		/* current file size on server */
+	u64  uniqueid;			/* server inode number */
 	struct inode vfs_inode;
 };
 
@@ -472,6 +473,30 @@ struct dfs_info3_param {
 	char *node_name;
 };
 
+/*
+ * common struct for holding inode info when searching for or updating an
+ * inode with new info
+ */
+
+#define CIFS_FATTR_DFS_REFERRAL		0x1
+
+struct cifs_fattr {
+	u32		cf_flags;
+	u32		cf_cifsattrs;
+	u64		cf_uniqueid;
+	u64		cf_eof;
+	u64		cf_bytes;
+	uid_t		cf_uid;
+	gid_t		cf_gid;
+	umode_t		cf_mode;
+	dev_t		cf_rdev;
+	unsigned int	cf_nlink;
+	unsigned int	cf_dtype;
+	struct timespec	cf_atime;
+	struct timespec	cf_mtime;
+	struct timespec	cf_ctime;
+};
+
 static inline void free_dfs_info_param(struct dfs_info3_param *param)
 {
 	if (param) {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index a785f69dbc9f..2d07f890a842 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2328,19 +2328,7 @@ struct file_attrib_tag {
 typedef struct {
 	__le32 NextEntryOffset;
 	__u32 ResumeKey; /* as with FileIndex - no need to convert */
-	__le64 EndOfFile;
-	__le64 NumOfBytes;
-	__le64 LastStatusChange; /*SNIA specs DCE time for the 3 time fields */
-	__le64 LastAccessTime;
-	__le64 LastModificationTime;
-	__le64 Uid;
-	__le64 Gid;
-	__le32 Type;
-	__le64 DevMajor;
-	__le64 DevMinor;
-	__le64 UniqueId;
-	__le64 Permissions;
-	__le64 Nlinks;
+	FILE_UNIX_BASIC_INFO basic;
 	char FileName[1];
 } __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */
 
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c419416a42ee..b2bd83fd2aa4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -98,9 +98,14 @@ extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
 extern int cifs_posix_open(char *full_path, struct inode **pinode,
 			   struct super_block *sb, int mode, int oflags,
 			   int *poplock, __u16 *pnetfid, int xid);
-extern void posix_fill_in_inode(struct inode *tmp_inode,
-				FILE_UNIX_BASIC_INFO *pData, int isNewInode);
+extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
+				     FILE_UNIX_BASIC_INFO *info,
+				     struct cifs_sb_info *cifs_sb);
+extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
 extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum);
+extern struct inode *cifs_iget(struct super_block *sb,
+			       struct cifs_fattr *fattr);
+
 extern int cifs_get_inode_info(struct inode **pinode,
 			const unsigned char *search_path,
 			FILE_ALL_INFO *pfile_info,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 7dc6b74f9def..a40054faed7f 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -188,6 +188,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
 	FILE_UNIX_BASIC_INFO *presp_data;
 	__u32 posix_flags = 0;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+	struct cifs_fattr fattr;
 
 	cFYI(1, ("posix open %s", full_path));
 
@@ -236,22 +237,21 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
 	if (presp_data->Type == cpu_to_le32(-1))
 		goto posix_open_ret; /* open ok, caller does qpathinfo */
 
-	/* get new inode and set it up */
 	if (!pinode)
 		goto posix_open_ret; /* caller does not need info */
 
+	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
+
+	/* get new inode and set it up */
 	if (*pinode == NULL) {
-		__u64 unique_id = le64_to_cpu(presp_data->UniqueId);
-		*pinode = cifs_new_inode(sb, &unique_id);
+		*pinode = cifs_iget(sb, &fattr);
+		if (!*pinode) {
+			rc = -ENOMEM;
+			goto posix_open_ret;
+		}
+	} else {
+		cifs_fattr_to_inode(*pinode, &fattr);
 	}
-	/* else an inode was passed in. Update its info, don't create one */
-
-	/* We do not need to close the file if new_inode fails since
-	   the caller will retry qpathinfo as long as inode is null */
-	if (*pinode == NULL)
-		goto posix_open_ret;
-
-	posix_fill_in_inode(*pinode, presp_data, 1);
 
 	cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only);
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 155c9e785d0c..b22379610d71 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -77,127 +77,146 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
 	}
 }
 
-static void cifs_unix_info_to_inode(struct inode *inode,
-		FILE_UNIX_BASIC_INFO *info, int force_uid_gid)
+/* populate an inode with info from a cifs_fattr struct */
+void
+cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
 {
-	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-	struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
-	__u64 num_of_bytes = le64_to_cpu(info->NumOfBytes);
-	__u64 end_of_file = le64_to_cpu(info->EndOfFile);
+	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
+	unsigned long now = jiffies;
+
+	inode->i_atime = fattr->cf_atime;
+	inode->i_mtime = fattr->cf_mtime;
+	inode->i_ctime = fattr->cf_ctime;
+	inode->i_mode = fattr->cf_mode;
+	inode->i_rdev = fattr->cf_rdev;
+	inode->i_nlink = fattr->cf_nlink;
+	inode->i_uid = fattr->cf_uid;
+	inode->i_gid = fattr->cf_gid;
+
+	cifs_i->cifsAttrs = fattr->cf_cifsattrs;
+	cifs_i->uniqueid = fattr->cf_uniqueid;
+
+	cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode,
+		 cifs_i->time, now));
+	cifs_i->time = now;
+
+	/*
+	 * Can't safely change the file size here if the client is writing to
+	 * it due to potential races.
+	 */
+	spin_lock(&inode->i_lock);
+	if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) {
+		i_size_write(inode, fattr->cf_eof);
+
+		/*
+		 * i_blocks is not related to (i_size / i_blksize),
+		 * but instead 512 byte (2**9) size is required for
+		 * calculating num blocks.
+		 */
+		inode->i_blocks = (512 - 1 + fattr->cf_bytes) >> 9;
+	}
+	spin_unlock(&inode->i_lock);
+
+	cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL);
+}
+
+/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */
+void
+cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
+			 struct cifs_sb_info *cifs_sb)
+{
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_uniqueid = le64_to_cpu(info->UniqueId);
+	fattr->cf_bytes = le64_to_cpu(info->NumOfBytes);
+	fattr->cf_eof = le64_to_cpu(info->EndOfFile);
 
-	inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime);
-	inode->i_mtime =
-		cifs_NTtimeToUnix(info->LastModificationTime);
-	inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
-	inode->i_mode = le64_to_cpu(info->Permissions);
+	fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
+	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
+	fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
+	fattr->cf_mode = le64_to_cpu(info->Permissions);
 
 	/*
 	 * Since we set the inode type below we need to mask off
 	 * to avoid strange results if bits set above.
 	 */
-	inode->i_mode &= ~S_IFMT;
+	fattr->cf_mode &= ~S_IFMT;
 	switch (le32_to_cpu(info->Type)) {
 	case UNIX_FILE:
-		inode->i_mode |= S_IFREG;
+		fattr->cf_mode |= S_IFREG;
+		fattr->cf_dtype = DT_REG;
 		break;
 	case UNIX_SYMLINK:
-		inode->i_mode |= S_IFLNK;
+		fattr->cf_mode |= S_IFLNK;
+		fattr->cf_dtype = DT_LNK;
 		break;
 	case UNIX_DIR:
-		inode->i_mode |= S_IFDIR;
+		fattr->cf_mode |= S_IFDIR;
+		fattr->cf_dtype = DT_DIR;
 		break;
 	case UNIX_CHARDEV:
-		inode->i_mode |= S_IFCHR;
-		inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor),
-				      le64_to_cpu(info->DevMinor) & MINORMASK);
+		fattr->cf_mode |= S_IFCHR;
+		fattr->cf_dtype = DT_CHR;
+		fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor),
+				       le64_to_cpu(info->DevMinor) & MINORMASK);
 		break;
 	case UNIX_BLOCKDEV:
-		inode->i_mode |= S_IFBLK;
-		inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor),
-				      le64_to_cpu(info->DevMinor) & MINORMASK);
+		fattr->cf_mode |= S_IFBLK;
+		fattr->cf_dtype = DT_BLK;
+		fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor),
+				       le64_to_cpu(info->DevMinor) & MINORMASK);
 		break;
 	case UNIX_FIFO:
-		inode->i_mode |= S_IFIFO;
+		fattr->cf_mode |= S_IFIFO;
+		fattr->cf_dtype = DT_FIFO;
 		break;
 	case UNIX_SOCKET:
-		inode->i_mode |= S_IFSOCK;
+		fattr->cf_mode |= S_IFSOCK;
+		fattr->cf_dtype = DT_SOCK;
 		break;
 	default:
 		/* safest to call it a file if we do not know */
-		inode->i_mode |= S_IFREG;
+		fattr->cf_mode |= S_IFREG;
+		fattr->cf_dtype = DT_REG;
 		cFYI(1, ("unknown type %d", le32_to_cpu(info->Type)));
 		break;
 	}
 
-	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) &&
-	    !force_uid_gid)
-		inode->i_uid = cifs_sb->mnt_uid;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
+		fattr->cf_uid = cifs_sb->mnt_uid;
 	else
-		inode->i_uid = le64_to_cpu(info->Uid);
+		fattr->cf_uid = le64_to_cpu(info->Uid);
 
-	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) &&
-	    !force_uid_gid)
-		inode->i_gid = cifs_sb->mnt_gid;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
+		fattr->cf_gid = cifs_sb->mnt_gid;
 	else
-		inode->i_gid = le64_to_cpu(info->Gid);
+		fattr->cf_gid = le64_to_cpu(info->Gid);
 
-	inode->i_nlink = le64_to_cpu(info->Nlinks);
-
-	cifsInfo->server_eof = end_of_file;
-	spin_lock(&inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
-		/*
-		 * We can not safely change the file size here if the client
-		 * is writing to it due to potential races.
-		 */
-		i_size_write(inode, end_of_file);
-
-		/*
-		 * i_blocks is not related to (i_size / i_blksize),
-		 * but instead 512 byte (2**9) size is required for
-		 * calculating num blocks.
-		 */
-		inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
-	}
-	spin_unlock(&inode->i_lock);
+	fattr->cf_nlink = le64_to_cpu(info->Nlinks);
 }
 
-
 /*
- *	Needed to setup inode data for the directory which is the
- *	junction to the new submount (ie to setup the fake directory
- *      which represents a DFS referral)
+ * Fill a cifs_fattr struct with fake inode info.
+ *
+ * Needed to setup cifs_fattr data for the directory which is the
+ * junction to the new submount (ie to setup the fake directory
+ * which represents a DFS referral).
  */
-static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat,
-			       struct super_block *sb)
+void
+cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
 {
-	struct inode *pinode = NULL;
-
-	memset(pfnd_dat, 0, sizeof(FILE_UNIX_BASIC_INFO));
+	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 
-/*	__le64 pfnd_dat->EndOfFile = cpu_to_le64(0);
-	__le64 pfnd_dat->NumOfBytes = cpu_to_le64(0);
-	__u64 UniqueId = 0;  */
-	pfnd_dat->LastStatusChange =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->LastAccessTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->LastModificationTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->Type = cpu_to_le32(UNIX_DIR);
-	pfnd_dat->Permissions = cpu_to_le64(S_IXUGO | S_IRWXU);
-	pfnd_dat->Nlinks = cpu_to_le64(2);
-	if (sb->s_root)
-		pinode = sb->s_root->d_inode;
-	if (pinode == NULL)
-		return;
-
-	/* fill in default values for the remaining based on root
-	   inode since we can not query the server for this inode info */
-	pfnd_dat->DevMajor = cpu_to_le64(MAJOR(pinode->i_rdev));
-	pfnd_dat->DevMinor = cpu_to_le64(MINOR(pinode->i_rdev));
-	pfnd_dat->Uid = cpu_to_le64(pinode->i_uid);
-	pfnd_dat->Gid = cpu_to_le64(pinode->i_gid);
+	cFYI(1, ("creating fake fattr for DFS referral"));
+
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
+	fattr->cf_uid = cifs_sb->mnt_uid;
+	fattr->cf_gid = cifs_sb->mnt_gid;
+	fattr->cf_atime = CURRENT_TIME;
+	fattr->cf_ctime = CURRENT_TIME;
+	fattr->cf_mtime = CURRENT_TIME;
+	fattr->cf_nlink = 2;
+	fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
 }
 
 /**
@@ -244,66 +263,42 @@ cifs_new_inode(struct super_block *sb, __u64 *inum)
 }
 
 int cifs_get_inode_info_unix(struct inode **pinode,
-	const unsigned char *full_path, struct super_block *sb, int xid)
+			     const unsigned char *full_path,
+			     struct super_block *sb, int xid)
 {
-	int rc = 0;
+	int rc;
 	FILE_UNIX_BASIC_INFO find_data;
-	struct cifsTconInfo *pTcon;
-	struct inode *inode;
+	struct cifs_fattr fattr;
+	struct cifsTconInfo *tcon;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
-	bool is_dfs_referral = false;
-	struct cifsInodeInfo *cifsInfo;
-	__u64 num_of_bytes;
-	__u64 end_of_file;
 
-	pTcon = cifs_sb->tcon;
+	tcon = cifs_sb->tcon;
 	cFYI(1, ("Getting info on %s", full_path));
 
 	/* could have done a find first instead but this returns more info */
-	rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data,
+	rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
 				  cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
-	if (rc == -EREMOTE && !is_dfs_referral) {
-		is_dfs_referral = true;
-		cFYI(DBG2, ("DFS ref"));
-		/* for DFS, server does not give us real inode data */
-		fill_fake_finddataunix(&find_data, sb);
-		rc = 0;
-	} else if (rc)
-		goto cgiiu_exit;
 
-	num_of_bytes = le64_to_cpu(find_data.NumOfBytes);
-	end_of_file = le64_to_cpu(find_data.EndOfFile);
+	if (!rc) {
+		cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb);
+	} else if (rc == -EREMOTE) {
+		cifs_create_dfs_fattr(&fattr, sb);
+		rc = 0;
+	} else {
+		return rc;
+	}
 
-	/* get new inode */
 	if (*pinode == NULL) {
-		__u64 unique_id = le64_to_cpu(find_data.UniqueId);
-		*pinode = cifs_new_inode(sb, &unique_id);
-		if (*pinode == NULL) {
+		/* get new inode */
+		*pinode = cifs_iget(sb, &fattr);
+		if (!*pinode)
 			rc = -ENOMEM;
-			goto cgiiu_exit;
-		}
+	} else {
+		/* we already have inode, update it */
+		cifs_fattr_to_inode(*pinode, &fattr);
 	}
 
-	inode = *pinode;
-	cifsInfo = CIFS_I(inode);
-
-	cFYI(1, ("Old time %ld", cifsInfo->time));
-	cifsInfo->time = jiffies;
-	cFYI(1, ("New time %ld", cifsInfo->time));
-	/* this is ok to set on every inode revalidate */
-	atomic_set(&cifsInfo->inUse, 1);
-
-	cifs_unix_info_to_inode(inode, &find_data, 0);
-
-	if (num_of_bytes < end_of_file)
-		cFYI(1, ("allocation size less than end of file"));
-	cFYI(1, ("Size %ld and blocks %llu",
-		(unsigned long) inode->i_size,
-		(unsigned long long)inode->i_blocks));
-
-	cifs_set_ops(inode, is_dfs_referral);
-cgiiu_exit:
 	return rc;
 }
 
@@ -695,33 +690,85 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
 	return full_path;
 }
 
+static int
+cifs_find_inode(struct inode *inode, void *opaque)
+{
+	struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
+
+	if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
+		return 0;
+
+	return 1;
+}
+
+static int
+cifs_init_inode(struct inode *inode, void *opaque)
+{
+	struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
+
+	CIFS_I(inode)->uniqueid = fattr->cf_uniqueid;
+	return 0;
+}
+
+/* Given fattrs, get a corresponding inode */
+struct inode *
+cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
+{
+	unsigned long hash;
+	struct inode *inode;
+
+	cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
+
+	/* hash down to 32-bits on 32-bit arch */
+	hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
+
+	inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
+
+	/* we have fattrs in hand, update the inode */
+	if (inode) {
+		cifs_fattr_to_inode(inode, fattr);
+		if (sb->s_flags & MS_NOATIME)
+			inode->i_flags |= S_NOATIME | S_NOCMTIME;
+		if (inode->i_state & I_NEW) {
+			inode->i_ino = hash;
+			unlock_new_inode(inode);
+		}
+	}
+
+	return inode;
+}
+
 /* gets root inode */
 struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 {
 	int xid;
 	struct cifs_sb_info *cifs_sb;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	long rc;
 	char *full_path;
 
-	inode = iget_locked(sb, ino);
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-	if (!(inode->i_state & I_NEW))
-		return inode;
-
-	cifs_sb = CIFS_SB(inode->i_sb);
+	cifs_sb = CIFS_SB(sb);
 	full_path = cifs_build_path_to_root(cifs_sb);
 	if (full_path == NULL)
 		return ERR_PTR(-ENOMEM);
 
 	xid = GetXid();
-	if (cifs_sb->tcon->unix_ext)
-		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
-						xid);
-	else
+	if (cifs_sb->tcon->unix_ext) {
+		rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
+		if (!inode)
+			return ERR_PTR(-ENOMEM);
+	} else {
+		inode = iget_locked(sb, ino);
+		if (!inode)
+			return ERR_PTR(-ENOMEM);
+		if (!(inode->i_state & I_NEW))
+			return inode;
+
 		rc = cifs_get_inode_info(&inode, full_path, NULL, inode->i_sb,
 						xid, NULL);
+		unlock_new_inode(inode);
+	}
+
 	if (rc && cifs_sb->tcon->ipc) {
 		cFYI(1, ("ipc connection - fake read inode"));
 		inode->i_mode |= S_IFDIR;
@@ -737,7 +784,6 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 		return ERR_PTR(rc);
 	}
 
-	unlock_new_inode(inode);
 
 	kfree(full_path);
 	/* can not call macro FreeXid here since in a void func
@@ -1063,44 +1109,6 @@ out_reval:
 	return rc;
 }
 
-void posix_fill_in_inode(struct inode *tmp_inode,
-	FILE_UNIX_BASIC_INFO *pData, int isNewInode)
-{
-	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
-	loff_t local_size;
-	struct timespec local_mtime;
-
-	cifsInfo->time = jiffies;
-	atomic_inc(&cifsInfo->inUse);
-
-	/* save mtime and size */
-	local_mtime = tmp_inode->i_mtime;
-	local_size  = tmp_inode->i_size;
-
-	cifs_unix_info_to_inode(tmp_inode, pData, 1);
-	cifs_set_ops(tmp_inode, false);
-
-	if (!S_ISREG(tmp_inode->i_mode))
-		return;
-
-	/*
-	 * No sense invalidating pages for new inode
-	 * since we we have not started caching
-	 * readahead file data yet.
-	 */
-	if (isNewInode)
-		return;
-
-	if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
-		(local_size == tmp_inode->i_size)) {
-		cFYI(1, ("inode exists but unchanged"));
-	} else {
-		/* file may have changed on server */
-		cFYI(1, ("invalidate inode, readdir detected change"));
-		invalidate_remote_inode(tmp_inode);
-	}
-}
-
 int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 {
 	int rc = 0, tmprc;
@@ -1109,6 +1117,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 	struct cifsTconInfo *pTcon;
 	char *full_path = NULL;
 	struct inode *newinode = NULL;
+	struct cifs_fattr fattr;
 
 	cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode));
 
@@ -1148,7 +1157,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 			cFYI(1, ("posix mkdir returned 0x%x", rc));
 			d_drop(direntry);
 		} else {
-			__u64 unique_id;
 			if (pInfo->Type == cpu_to_le32(-1)) {
 				/* no return info, go query for it */
 				kfree(pInfo);
@@ -1162,20 +1170,15 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 			else
 				direntry->d_op = &cifs_dentry_ops;
 
-			unique_id = le64_to_cpu(pInfo->UniqueId);
-			newinode = cifs_new_inode(inode->i_sb, &unique_id);
-			if (newinode == NULL) {
+			cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
+			newinode = cifs_iget(inode->i_sb, &fattr);
+			if (!newinode) {
 				kfree(pInfo);
 				goto mkdir_get_info;
 			}
 
-			newinode->i_nlink = 2;
 			d_instantiate(direntry, newinode);
 
-			/* we already checked in POSIXCreate whether
-			   frame was long enough */
-			posix_fill_in_inode(direntry->d_inode,
-					pInfo, 1 /* NewInode */);
 #ifdef CONFIG_CIFS_DEBUG2
 			cFYI(1, ("instantiated dentry %p %s to inode %p",
 				direntry, direntry->d_name.name, newinode));
@@ -1622,6 +1625,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	if (!err) {
 		generic_fillattr(dentry->d_inode, stat);
 		stat->blksize = CIFS_MAX_MSGSIZE;
+		stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
 	}
 	return err;
 }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 86d0055dc529..231aa6953f83 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -63,6 +63,55 @@ static inline void dump_cifs_file_struct(struct file *file, char *label)
 }
 #endif /* DEBUG2 */
 
+/*
+ * Find the dentry that matches "name". If there isn't one, create one. If it's
+ * a negative dentry or the uniqueid changed, then drop it and recreate it.
+ */
+static struct dentry *
+cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
+		    struct cifs_fattr *fattr)
+{
+	struct dentry *dentry, *alias;
+	struct inode *inode;
+	struct super_block *sb = parent->d_inode->i_sb;
+
+	cFYI(1, ("For %s", name->name));
+
+	dentry = d_lookup(parent, name);
+	if (dentry) {
+		/* FIXME: check for inode number changes? */
+		if (dentry->d_inode != NULL)
+			return dentry;
+		d_drop(dentry);
+		dput(dentry);
+	}
+
+	dentry = d_alloc(parent, name);
+	if (dentry == NULL)
+		return NULL;
+
+	inode = cifs_iget(sb, fattr);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+
+	if (CIFS_SB(sb)->tcon->nocase)
+		dentry->d_op = &cifs_ci_dentry_ops;
+	else
+		dentry->d_op = &cifs_dentry_ops;
+
+	alias = d_materialise_unique(dentry, inode);
+	if (alias != NULL) {
+		dput(dentry);
+		if (IS_ERR(alias))
+			return NULL;
+		dentry = alias;
+	}
+
+	return dentry;
+}
+
 /* Returns 1 if new inode created, 2 if both dentry and inode were */
 /* Might check in the future if inode number changed so we can rehash inode */
 static int
@@ -76,7 +125,6 @@ construct_dentry(struct qstr *qstring, struct file *file,
 
 	cFYI(1, ("For %s", qstring->name));
 
-	qstring->hash = full_name_hash(qstring->name, qstring->len);
 	tmp_dentry = d_lookup(file->f_path.dentry, qstring);
 	if (tmp_dentry) {
 		/* BB: overwrite old name? i.e. tmp_dentry->d_name and
@@ -299,140 +347,6 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 	}
 }
 
-static void unix_fill_in_inode(struct inode *tmp_inode,
-	FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode)
-{
-	loff_t local_size;
-	struct timespec local_mtime;
-
-	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
-	struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
-
-	__u32 type = le32_to_cpu(pfindData->Type);
-	__u64 num_of_bytes = le64_to_cpu(pfindData->NumOfBytes);
-	__u64 end_of_file = le64_to_cpu(pfindData->EndOfFile);
-	cifsInfo->time = jiffies;
-	atomic_inc(&cifsInfo->inUse);
-
-	/* save mtime and size */
-	local_mtime = tmp_inode->i_mtime;
-	local_size  = tmp_inode->i_size;
-
-	tmp_inode->i_atime =
-	    cifs_NTtimeToUnix(pfindData->LastAccessTime);
-	tmp_inode->i_mtime =
-	    cifs_NTtimeToUnix(pfindData->LastModificationTime);
-	tmp_inode->i_ctime =
-	    cifs_NTtimeToUnix(pfindData->LastStatusChange);
-
-	tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions);
-	/* since we set the inode type below we need to mask off type
-	   to avoid strange results if bits above were corrupt */
-	tmp_inode->i_mode &= ~S_IFMT;
-	if (type == UNIX_FILE) {
-		*pobject_type = DT_REG;
-		tmp_inode->i_mode |= S_IFREG;
-	} else if (type == UNIX_SYMLINK) {
-		*pobject_type = DT_LNK;
-		tmp_inode->i_mode |= S_IFLNK;
-	} else if (type == UNIX_DIR) {
-		*pobject_type = DT_DIR;
-		tmp_inode->i_mode |= S_IFDIR;
-	} else if (type == UNIX_CHARDEV) {
-		*pobject_type = DT_CHR;
-		tmp_inode->i_mode |= S_IFCHR;
-		tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor),
-				le64_to_cpu(pfindData->DevMinor) & MINORMASK);
-	} else if (type == UNIX_BLOCKDEV) {
-		*pobject_type = DT_BLK;
-		tmp_inode->i_mode |= S_IFBLK;
-		tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor),
-				le64_to_cpu(pfindData->DevMinor) & MINORMASK);
-	} else if (type == UNIX_FIFO) {
-		*pobject_type = DT_FIFO;
-		tmp_inode->i_mode |= S_IFIFO;
-	} else if (type == UNIX_SOCKET) {
-		*pobject_type = DT_SOCK;
-		tmp_inode->i_mode |= S_IFSOCK;
-	} else {
-		/* safest to just call it a file */
-		*pobject_type = DT_REG;
-		tmp_inode->i_mode |= S_IFREG;
-		cFYI(1, ("unknown inode type %d", type));
-	}
-
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
-		tmp_inode->i_uid = cifs_sb->mnt_uid;
-	else
-		tmp_inode->i_uid = le64_to_cpu(pfindData->Uid);
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
-		tmp_inode->i_gid = cifs_sb->mnt_gid;
-	else
-		tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
-	tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
-
-	cifsInfo->server_eof = end_of_file;
-	spin_lock(&tmp_inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
-		/* can not safely change the file size here if the
-		client is writing to it due to potential races */
-		i_size_write(tmp_inode, end_of_file);
-
-	/* 512 bytes (2**9) is the fake blocksize that must be used */
-	/* for this calculation, not the real blocksize */
-		tmp_inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
-	}
-	spin_unlock(&tmp_inode->i_lock);
-
-	if (S_ISREG(tmp_inode->i_mode)) {
-		cFYI(1, ("File inode"));
-		tmp_inode->i_op = &cifs_file_inode_ops;
-
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-				tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
-			else
-				tmp_inode->i_fop = &cifs_file_direct_ops;
-		} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-			tmp_inode->i_fop = &cifs_file_nobrl_ops;
-		else
-			tmp_inode->i_fop = &cifs_file_ops;
-
-		if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
-		   (cifs_sb->tcon->ses->server->maxBuf <
-			PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
-			tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
-		else
-			tmp_inode->i_data.a_ops = &cifs_addr_ops;
-
-		if (isNewInode)
-			return; /* No sense invalidating pages for new inode
-				   since we have not started caching readahead
-				   file data for it yet */
-
-		if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
-			(local_size == tmp_inode->i_size)) {
-			cFYI(1, ("inode exists but unchanged"));
-		} else {
-			/* file may have changed on server */
-			cFYI(1, ("invalidate inode, readdir detected change"));
-			invalidate_remote_inode(tmp_inode);
-		}
-	} else if (S_ISDIR(tmp_inode->i_mode)) {
-		cFYI(1, ("Directory inode"));
-		tmp_inode->i_op = &cifs_dir_inode_ops;
-		tmp_inode->i_fop = &cifs_dir_ops;
-	} else if (S_ISLNK(tmp_inode->i_mode)) {
-		cFYI(1, ("Symbolic Link inode"));
-		tmp_inode->i_op = &cifs_symlink_inode_ops;
-/* tmp_inode->i_fop = *//* do not need to set to anything */
-	} else {
-		cFYI(1, ("Special inode"));
-		init_special_inode(tmp_inode, tmp_inode->i_mode,
-				   tmp_inode->i_rdev);
-	}
-}
-
 /* BB eventually need to add the following helper function to
       resolve NT_STATUS_STOPPED_ON_SYMLINK return code when
       we try to do FindFirst on (NTFS) directory symlinks */
@@ -872,7 +786,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
 			len = strnlen(filename, PATH_MAX);
 		}
 
-		*pinum = le64_to_cpu(pFindData->UniqueId);
+		*pinum = le64_to_cpu(pFindData->basic.UniqueId);
 	} else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
 		FILE_DIRECTORY_INFO *pFindData =
 			(FILE_DIRECTORY_INFO *)current_entry;
@@ -934,9 +848,11 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 	struct cifsFileInfo *pCifsF;
 	unsigned int obj_type;
 	__u64  inum;
+	ino_t  ino;
 	struct cifs_sb_info *cifs_sb;
 	struct inode *tmp_inode;
 	struct dentry *tmp_dentry;
+	struct cifs_fattr fattr;
 
 	/* get filename and len into qstring */
 	/* get dentry */
@@ -967,39 +883,49 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 		return rc;
 
 	/* only these two infolevels return valid inode numbers */
-	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX ||
-	    pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
-		rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
-					&inum);
-	else
-		rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
-					NULL);
+	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
+		cifs_unix_basic_to_fattr(&fattr,
+				 &((FILE_UNIX_INFO *) pfindEntry)->basic,
+				 cifs_sb);
+		tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring,
+						 &fattr);
+		obj_type = fattr.cf_dtype;
+		ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
+	} else {
+		if (pCifsF->srch_inf.info_level ==
+		    SMB_FIND_FILE_ID_FULL_DIR_INFO)
+			rc = construct_dentry(&qstring, file, &tmp_inode,
+						&tmp_dentry, &inum);
+		else
+			rc = construct_dentry(&qstring, file, &tmp_inode,
+						&tmp_dentry, NULL);
 
-	if ((tmp_inode == NULL) || (tmp_dentry == NULL))
-		return -ENOMEM;
+		if ((tmp_inode == NULL) || (tmp_dentry == NULL)) {
+			rc = -ENOMEM;
+			goto out;
+		}
 
-	/* we pass in rc below, indicating whether it is a new inode,
-	   so we can figure out whether to invalidate the inode cached
-	   data if the file has changed */
-	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
-		unix_fill_in_inode(tmp_inode,
-				   (FILE_UNIX_INFO *)pfindEntry,
-				   &obj_type, rc);
-	else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
-		fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */,
-				pfindEntry, &obj_type, rc);
-	else
-		fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc);
+		/* we pass in rc below, indicating whether it is a new inode,
+		 * so we can figure out whether to invalidate the inode cached
+		 * data if the file has changed
+		 */
+		if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
+			fill_in_inode(tmp_inode, 0, pfindEntry, &obj_type, rc);
+		else
+			fill_in_inode(tmp_inode, 1, pfindEntry, &obj_type, rc);
 
-	if (rc) /* new inode - needs to be tied to dentry */ {
-		d_instantiate(tmp_dentry, tmp_inode);
-		if (rc == 2)
-			d_rehash(tmp_dentry);
-	}
+		/* new inode - needs to be tied to dentry */
+		if (rc) {
+			d_instantiate(tmp_dentry, tmp_inode);
+			if (rc == 2)
+				d_rehash(tmp_dentry);
+		}
 
+		ino = cifs_uniqueid_to_ino_t(tmp_inode->i_ino);
+	}
 
 	rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
-		     tmp_inode->i_ino, obj_type);
+		     ino, obj_type);
 	if (rc) {
 		cFYI(1, ("filldir rc = %d", rc));
 		/* we can not return filldir errors to the caller
@@ -1008,6 +934,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 		rc = -EOVERFLOW;
 	}
 
+out:
 	dput(tmp_dentry);
 	return rc;
 }
-- 
cgit v1.2.3


From bdae997f44535ac4ebe1e055ffe59eeee946f453 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Wed, 1 Jul 2009 21:56:38 -0700
Subject: fs/notify/inotify: decrement user inotify count on close

The per-user inotify_devs value is incremented each time a new file is
allocated, but never decremented. This led to inotify_init failing after a
limited number of calls.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ff231ad23895..ff27a2965844 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on)
 static int inotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
+	struct user_struct *user = group->inotify_data.user;
 
 	fsnotify_clear_marks_by_group(group);
 
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	fsnotify_put_group(group);
 
+	atomic_dec(&user->inotify_devs);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From f597bb19ccd034cbcf05e1194238e2c8d9505a8a Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Sat, 27 Jun 2009 21:06:22 -0400
Subject: Btrfs: don't log the inode in file_write while growing the file

---
 fs/btrfs/file.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 126477eaecf5..7c3cd248d8d6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	}
 	if (end_pos > isize) {
 		i_size_write(inode, end_pos);
-		btrfs_update_inode(trans, root, inode);
+		/* we've only changed i_size in ram, and we haven't updated
+		 * the disk i_size.  There is no need to log the inode
+		 * at this time.
+		 */
 	}
 	err = btrfs_end_transaction(trans, root);
 out_unlock:
-- 
cgit v1.2.3


From c8a894d77de4a1e0a544577fd4eabc9aacd453a8 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Sat, 27 Jun 2009 21:07:03 -0400
Subject: Btrfs: fix the file clone ioctl for preallocated extents

---
 fs/btrfs/ioctl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index eff18f5b5362..9f4db848db10 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 						struct btrfs_file_extent_item);
 			comp = btrfs_file_extent_compression(leaf, extent);
 			type = btrfs_file_extent_type(leaf, extent);
-			if (type == BTRFS_FILE_EXTENT_REG) {
+			if (type == BTRFS_FILE_EXTENT_REG ||
+			    type == BTRFS_FILE_EXTENT_PREALLOC) {
 				disko = btrfs_file_extent_disk_bytenr(leaf,
 								      extent);
 				diskl = btrfs_file_extent_disk_num_bytes(leaf,
@@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 			new_key.objectid = inode->i_ino;
 			new_key.offset = key.offset + destoff - off;
 
-			if (type == BTRFS_FILE_EXTENT_REG) {
+			if (type == BTRFS_FILE_EXTENT_REG ||
+			    type == BTRFS_FILE_EXTENT_PREALLOC) {
 				ret = btrfs_insert_empty_item(trans, root, path,
 							      &new_key, size);
 				if (ret)
-- 
cgit v1.2.3


From a970b0a16cc416a509d5ae8b1d70978664e6f4fe Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@redhat.com>
Date: Sat, 27 Jun 2009 21:07:34 -0400
Subject: Btrfs: account for space we may use in fallocate

Using Eric Sandeen's xfstest for fallocate, you can easily trigger a ENOSPC
panic on btrfs.  This is because we do not account for data we may use when
doing the fallocate.  This patch fixes the problem by properly reserving space,
and then just freeing it when we are done.  The reservation stuff was made with
delalloc in mind, so its a little crude for this case, but it keeps the box
from panicing.

Signed-off-by: Josef Bacik <jbacik@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b68330f8585..1eacc78f6614 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5103,6 +5103,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
 	struct extent_map *em;
 	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root;
 	int ret;
 
 	alloc_start = offset & ~mask;
@@ -5121,6 +5122,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 			goto out;
 	}
 
+	root = BTRFS_I(inode)->root;
+
+	ret = btrfs_check_data_free_space(root, inode,
+					  alloc_end - alloc_start);
+	if (ret)
+		goto out;
+
 	locked_end = alloc_end - 1;
 	while (1) {
 		struct btrfs_ordered_extent *ordered;
@@ -5128,7 +5136,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 		trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
 		if (!trans) {
 			ret = -EIO;
-			goto out;
+			goto out_free;
 		}
 
 		/* the extent lock is ordered inside the running
@@ -5189,6 +5197,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 		      GFP_NOFS);
 
 	btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+out_free:
+	btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
 out:
 	mutex_unlock(&inode->i_mutex);
 	return ret;
-- 
cgit v1.2.3


From 2c47e605a91dde6b0514f689645e7ab336c8592a Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Sat, 27 Jun 2009 21:07:35 -0400
Subject: Btrfs: update backrefs while dropping snapshot

The new backref format has restriction on type of backref item.  If a tree
block isn't referenced by its owner tree, full backrefs must be used for the
pointers in it. When a tree block loses its owner tree's reference, backrefs
for the pointers in it should be updated to full backrefs. Current
btrfs_drop_snapshot misses the code that updates backrefs, so it's unsafe for
general use.

This patch adds backrefs update code to btrfs_drop_snapshot.  It isn't a
problem in the restricted form btrfs_drop_snapshot is used today, but for
general snapshot deletion this update is required.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.h       |   3 +-
 fs/btrfs/extent-tree.c | 564 ++++++++++++++++++++++++++++++++++---------------
 fs/btrfs/relocation.c  |   5 +-
 fs/btrfs/transaction.c |   4 +-
 4 files changed, 395 insertions(+), 181 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 03441a99ea38..a404ecc53eb1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2076,8 +2076,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
-			*root);
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d208c5ce..cd64cfc14f7f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
 	return type;
 }
 
-static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+static int find_next_key(struct btrfs_path *path, int level,
+			 struct btrfs_key *key)
 
 {
-	int level;
-	BUG_ON(!path->keep_locks);
-	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+	for (; level < BTRFS_MAX_LEVEL; level++) {
 		if (!path->nodes[level])
 			break;
-		btrfs_assert_tree_locked(path->nodes[level]);
 		if (path->slots[level] + 1 >=
 		    btrfs_header_nritems(path->nodes[level]))
 			continue;
@@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		 * For simplicity, we just do not add new inline back
 		 * ref if there is any kind of item for this block
 		 */
-		if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+		if (find_next_key(path, 0, &key) == 0 &&
+		    key.objectid == bytenr &&
 		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
 			err = -EAGAIN;
 			goto out;
@@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 	return buf;
 }
 
+#if 0
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, struct extent_buffer *leaf)
 {
@@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
-#if 0
-
 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 					struct btrfs_root *root,
 					struct btrfs_leaf_ref *ref)
@@ -4553,262 +4551,471 @@ out:
 }
 #endif
 
+struct walk_control {
+	u64 refs[BTRFS_MAX_LEVEL];
+	u64 flags[BTRFS_MAX_LEVEL];
+	struct btrfs_key update_progress;
+	int stage;
+	int level;
+	int shared_level;
+	int update_ref;
+	int keep_locks;
+};
+
+#define DROP_REFERENCE	1
+#define UPDATE_BACKREF	2
+
 /*
- * helper function for drop_subtree, this function is similar to
- * walk_down_tree. The main difference is that it checks reference
- * counts while tree blocks are locked.
+ * hepler to process tree block while walking down the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block. if the block is shared and
+ * we need update back refs for the subtree rooted at the
+ * block, this function changes wc->stage to UPDATE_BACKREF
+ *
+ * when wc->stage == UPDATE_BACKREF, this function updates
+ * back refs for pointers in the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
  */
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
-				   struct btrfs_path *path, int *level)
+				   struct btrfs_path *path,
+				   struct walk_control *wc)
 {
-	struct extent_buffer *next;
-	struct extent_buffer *cur;
-	struct extent_buffer *parent;
-	u64 bytenr;
-	u64 ptr_gen;
-	u64 refs;
-	u64 flags;
-	u32 blocksize;
+	int level = wc->level;
+	struct extent_buffer *eb = path->nodes[level];
+	struct btrfs_key key;
+	u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
 	int ret;
 
-	cur = path->nodes[*level];
-	ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
-				       &refs, &flags);
-	BUG_ON(ret);
-	if (refs > 1)
-		goto out;
+	if (wc->stage == UPDATE_BACKREF &&
+	    btrfs_header_owner(eb) != root->root_key.objectid)
+		return 1;
 
-	BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+	/*
+	 * when reference count of tree block is 1, it won't increase
+	 * again. once full backref flag is set, we never clear it.
+	 */
+	if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
+	    (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
+		BUG_ON(!path->locks[level]);
+		ret = btrfs_lookup_extent_info(trans, root,
+					       eb->start, eb->len,
+					       &wc->refs[level],
+					       &wc->flags[level]);
+		BUG_ON(ret);
+		BUG_ON(wc->refs[level] == 0);
+	}
 
-	while (*level >= 0) {
-		cur = path->nodes[*level];
-		if (*level == 0) {
-			ret = btrfs_drop_leaf_ref(trans, root, cur);
-			BUG_ON(ret);
-			clean_tree_block(trans, root, cur);
-			break;
-		}
-		if (path->slots[*level] >= btrfs_header_nritems(cur)) {
-			clean_tree_block(trans, root, cur);
-			break;
+	if (wc->stage == DROP_REFERENCE &&
+	    wc->update_ref && wc->refs[level] > 1) {
+		BUG_ON(eb == root->node);
+		BUG_ON(path->slots[level] > 0);
+		if (level == 0)
+			btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
+		else
+			btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
+		if (btrfs_header_owner(eb) == root->root_key.objectid &&
+		    btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
+			wc->stage = UPDATE_BACKREF;
+			wc->shared_level = level;
 		}
+	}
 
-		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
-		blocksize = btrfs_level_size(root, *level - 1);
-		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+	if (wc->stage == DROP_REFERENCE) {
+		if (wc->refs[level] > 1)
+			return 1;
 
-		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
-		btrfs_tree_lock(next);
-		btrfs_set_lock_blocking(next);
+		if (path->locks[level] && !wc->keep_locks) {
+			btrfs_tree_unlock(eb);
+			path->locks[level] = 0;
+		}
+		return 0;
+	}
 
-		ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
-					       &refs, &flags);
+	/* wc->stage == UPDATE_BACKREF */
+	if (!(wc->flags[level] & flag)) {
+		BUG_ON(!path->locks[level]);
+		ret = btrfs_inc_ref(trans, root, eb, 1);
 		BUG_ON(ret);
-		if (refs > 1) {
-			parent = path->nodes[*level];
-			ret = btrfs_free_extent(trans, root, bytenr,
-						blocksize, parent->start,
-						btrfs_header_owner(parent),
-						*level - 1, 0);
+		ret = btrfs_dec_ref(trans, root, eb, 0);
+		BUG_ON(ret);
+		ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
+						  eb->len, flag, 0);
+		BUG_ON(ret);
+		wc->flags[level] |= flag;
+	}
+
+	/*
+	 * the block is shared by multiple trees, so it's not good to
+	 * keep the tree lock
+	 */
+	if (path->locks[level] && level > 0) {
+		btrfs_tree_unlock(eb);
+		path->locks[level] = 0;
+	}
+	return 0;
+}
+
+/*
+ * hepler to process tree block while walking up the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function drops
+ * reference count on the block.
+ *
+ * when wc->stage == UPDATE_BACKREF, this function changes
+ * wc->stage back to DROP_REFERENCE if we changed wc->stage
+ * to UPDATE_BACKREF previously while processing the block.
+ *
+ * NOTE: return value 1 means we should stop walking up.
+ */
+static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct walk_control *wc)
+{
+	int ret = 0;
+	int level = wc->level;
+	struct extent_buffer *eb = path->nodes[level];
+	u64 parent = 0;
+
+	if (wc->stage == UPDATE_BACKREF) {
+		BUG_ON(wc->shared_level < level);
+		if (level < wc->shared_level)
+			goto out;
+
+		BUG_ON(wc->refs[level] <= 1);
+		ret = find_next_key(path, level + 1, &wc->update_progress);
+		if (ret > 0)
+			wc->update_ref = 0;
+
+		wc->stage = DROP_REFERENCE;
+		wc->shared_level = -1;
+		path->slots[level] = 0;
+
+		/*
+		 * check reference count again if the block isn't locked.
+		 * we should start walking down the tree again if reference
+		 * count is one.
+		 */
+		if (!path->locks[level]) {
+			BUG_ON(level == 0);
+			btrfs_tree_lock(eb);
+			btrfs_set_lock_blocking(eb);
+			path->locks[level] = 1;
+
+			ret = btrfs_lookup_extent_info(trans, root,
+						       eb->start, eb->len,
+						       &wc->refs[level],
+						       &wc->flags[level]);
 			BUG_ON(ret);
-			path->slots[*level]++;
-			btrfs_tree_unlock(next);
-			free_extent_buffer(next);
-			continue;
+			BUG_ON(wc->refs[level] == 0);
+			if (wc->refs[level] == 1) {
+				btrfs_tree_unlock(eb);
+				path->locks[level] = 0;
+				return 1;
+			}
+		} else {
+			BUG_ON(level != 0);
 		}
+	}
 
-		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+	/* wc->stage == DROP_REFERENCE */
+	BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
 
-		*level = btrfs_header_level(next);
-		path->nodes[*level] = next;
-		path->slots[*level] = 0;
-		path->locks[*level] = 1;
-		cond_resched();
+	if (wc->refs[level] == 1) {
+		if (level == 0) {
+			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+				ret = btrfs_dec_ref(trans, root, eb, 1);
+			else
+				ret = btrfs_dec_ref(trans, root, eb, 0);
+			BUG_ON(ret);
+		}
+		/* make block locked assertion in clean_tree_block happy */
+		if (!path->locks[level] &&
+		    btrfs_header_generation(eb) == trans->transid) {
+			btrfs_tree_lock(eb);
+			btrfs_set_lock_blocking(eb);
+			path->locks[level] = 1;
+		}
+		clean_tree_block(trans, root, eb);
+	}
+
+	if (eb == root->node) {
+		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+			parent = eb->start;
+		else
+			BUG_ON(root->root_key.objectid !=
+			       btrfs_header_owner(eb));
+	} else {
+		if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+			parent = path->nodes[level + 1]->start;
+		else
+			BUG_ON(root->root_key.objectid !=
+			       btrfs_header_owner(path->nodes[level + 1]));
 	}
-out:
-	if (path->nodes[*level] == root->node)
-		parent = path->nodes[*level];
-	else
-		parent = path->nodes[*level + 1];
-	bytenr = path->nodes[*level]->start;
-	blocksize = path->nodes[*level]->len;
 
-	ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
-				btrfs_header_owner(parent), *level, 0);
+	ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
+				root->root_key.objectid, level, 0);
 	BUG_ON(ret);
+out:
+	wc->refs[level] = 0;
+	wc->flags[level] = 0;
+	return ret;
+}
+
+static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct btrfs_path *path,
+				   struct walk_control *wc)
+{
+	struct extent_buffer *next;
+	struct extent_buffer *cur;
+	u64 bytenr;
+	u64 ptr_gen;
+	u32 blocksize;
+	int level = wc->level;
+	int ret;
+
+	while (level >= 0) {
+		cur = path->nodes[level];
+		BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
 
-	if (path->locks[*level]) {
-		btrfs_tree_unlock(path->nodes[*level]);
-		path->locks[*level] = 0;
+		ret = walk_down_proc(trans, root, path, wc);
+		if (ret > 0)
+			break;
+
+		if (level == 0)
+			break;
+
+		bytenr = btrfs_node_blockptr(cur, path->slots[level]);
+		blocksize = btrfs_level_size(root, level - 1);
+		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
+
+		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
+		btrfs_tree_lock(next);
+		btrfs_set_lock_blocking(next);
+
+		level--;
+		BUG_ON(level != btrfs_header_level(next));
+		path->nodes[level] = next;
+		path->slots[level] = 0;
+		path->locks[level] = 1;
+		wc->level = level;
 	}
-	free_extent_buffer(path->nodes[*level]);
-	path->nodes[*level] = NULL;
-	*level += 1;
-	cond_resched();
 	return 0;
 }
 
-/*
- * helper for dropping snapshots.  This walks back up the tree in the path
- * to find the first node higher up where we haven't yet gone through
- * all the slots
- */
 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct btrfs_path *path,
-				 int *level, int max_level)
+				 struct walk_control *wc, int max_level)
 {
-	struct btrfs_root_item *root_item = &root->root_item;
-	int i;
-	int slot;
+	int level = wc->level;
 	int ret;
 
-	for (i = *level; i < max_level && path->nodes[i]; i++) {
-		slot = path->slots[i];
-		if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
-			/*
-			 * there is more work to do in this level.
-			 * Update the drop_progress marker to reflect
-			 * the work we've done so far, and then bump
-			 * the slot number
-			 */
-			path->slots[i]++;
-			WARN_ON(*level == 0);
-			if (max_level == BTRFS_MAX_LEVEL) {
-				btrfs_node_key(path->nodes[i],
-					       &root_item->drop_progress,
-					       path->slots[i]);
-				root_item->drop_level = i;
-			}
-			*level = i;
+	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
+	while (level < max_level && path->nodes[level]) {
+		wc->level = level;
+		if (path->slots[level] + 1 <
+		    btrfs_header_nritems(path->nodes[level])) {
+			path->slots[level]++;
 			return 0;
 		} else {
-			struct extent_buffer *parent;
-
-			/*
-			 * this whole node is done, free our reference
-			 * on it and go up one level
-			 */
-			if (path->nodes[*level] == root->node)
-				parent = path->nodes[*level];
-			else
-				parent = path->nodes[*level + 1];
+			ret = walk_up_proc(trans, root, path, wc);
+			if (ret > 0)
+				return 0;
 
-			clean_tree_block(trans, root, path->nodes[i]);
-			ret = btrfs_free_extent(trans, root,
-						path->nodes[i]->start,
-						path->nodes[i]->len,
-						parent->start,
-						btrfs_header_owner(parent),
-						*level, 0);
-			BUG_ON(ret);
-			if (path->locks[*level]) {
-				btrfs_tree_unlock(path->nodes[i]);
-				path->locks[i] = 0;
+			if (path->locks[level]) {
+				btrfs_tree_unlock(path->nodes[level]);
+				path->locks[level] = 0;
 			}
-			free_extent_buffer(path->nodes[i]);
-			path->nodes[i] = NULL;
-			*level = i + 1;
+			free_extent_buffer(path->nodes[level]);
+			path->nodes[level] = NULL;
+			level++;
 		}
 	}
 	return 1;
 }
 
 /*
- * drop the reference count on the tree rooted at 'snap'.  This traverses
- * the tree freeing any blocks that have a ref count of zero after being
- * decremented.
+ * drop a subvolume tree.
+ *
+ * this function traverses the tree freeing any blocks that only
+ * referenced by the tree.
+ *
+ * when a shared tree block is found. this function decreases its
+ * reference count by one. if update_ref is true, this function
+ * also make sure backrefs for the shared block and all lower level
+ * blocks are properly updated.
  */
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
-			*root)
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
 {
-	int ret = 0;
-	int wret;
-	int level;
 	struct btrfs_path *path;
-	int update_count;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	struct btrfs_root_item *root_item = &root->root_item;
+	struct walk_control *wc;
+	struct btrfs_key key;
+	int err = 0;
+	int ret;
+	int level;
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
-	level = btrfs_header_level(root->node);
+	wc = kzalloc(sizeof(*wc), GFP_NOFS);
+	BUG_ON(!wc);
+
+	trans = btrfs_start_transaction(tree_root, 1);
+
 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+		level = btrfs_header_level(root->node);
 		path->nodes[level] = btrfs_lock_root_node(root);
 		btrfs_set_lock_blocking(path->nodes[level]);
 		path->slots[level] = 0;
 		path->locks[level] = 1;
+		memset(&wc->update_progress, 0,
+		       sizeof(wc->update_progress));
 	} else {
-		struct btrfs_key key;
-		struct btrfs_disk_key found_key;
-		struct extent_buffer *node;
-
 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+		memcpy(&wc->update_progress, &key,
+		       sizeof(wc->update_progress));
+
 		level = root_item->drop_level;
+		BUG_ON(level == 0);
 		path->lowest_level = level;
-		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-		if (wret < 0) {
-			ret = wret;
+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+		path->lowest_level = 0;
+		if (ret < 0) {
+			err = ret;
 			goto out;
 		}
-		node = path->nodes[level];
-		btrfs_node_key(node, &found_key, path->slots[level]);
-		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
-			       sizeof(found_key)));
+		btrfs_node_key_to_cpu(path->nodes[level], &key,
+				      path->slots[level]);
+		WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+
 		/*
 		 * unlock our path, this is safe because only this
 		 * function is allowed to delete this snapshot
 		 */
 		btrfs_unlock_up_safe(path, 0);
+
+		level = btrfs_header_level(root->node);
+		while (1) {
+			btrfs_tree_lock(path->nodes[level]);
+			btrfs_set_lock_blocking(path->nodes[level]);
+
+			ret = btrfs_lookup_extent_info(trans, root,
+						path->nodes[level]->start,
+						path->nodes[level]->len,
+						&wc->refs[level],
+						&wc->flags[level]);
+			BUG_ON(ret);
+			BUG_ON(wc->refs[level] == 0);
+
+			if (level == root_item->drop_level)
+				break;
+
+			btrfs_tree_unlock(path->nodes[level]);
+			WARN_ON(wc->refs[level] != 1);
+			level--;
+		}
 	}
+
+	wc->level = level;
+	wc->shared_level = -1;
+	wc->stage = DROP_REFERENCE;
+	wc->update_ref = update_ref;
+	wc->keep_locks = 0;
+
 	while (1) {
-		unsigned long update;
-		wret = walk_down_tree(trans, root, path, &level);
-		if (wret > 0)
+		ret = walk_down_tree(trans, root, path, wc);
+		if (ret < 0) {
+			err = ret;
 			break;
-		if (wret < 0)
-			ret = wret;
+		}
 
-		wret = walk_up_tree(trans, root, path, &level,
-				    BTRFS_MAX_LEVEL);
-		if (wret > 0)
+		ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
+		if (ret < 0) {
+			err = ret;
 			break;
-		if (wret < 0)
-			ret = wret;
-		if (trans->transaction->in_commit ||
-		    trans->transaction->delayed_refs.flushing) {
-			ret = -EAGAIN;
+		}
+
+		if (ret > 0) {
+			BUG_ON(wc->stage != DROP_REFERENCE);
 			break;
 		}
-		for (update_count = 0; update_count < 16; update_count++) {
+
+		if (wc->stage == DROP_REFERENCE) {
+			level = wc->level;
+			btrfs_node_key(path->nodes[level],
+				       &root_item->drop_progress,
+				       path->slots[level]);
+			root_item->drop_level = level;
+		}
+
+		BUG_ON(wc->level == 0);
+		if (trans->transaction->in_commit ||
+		    trans->transaction->delayed_refs.flushing) {
+			ret = btrfs_update_root(trans, tree_root,
+						&root->root_key,
+						root_item);
+			BUG_ON(ret);
+
+			btrfs_end_transaction(trans, tree_root);
+			trans = btrfs_start_transaction(tree_root, 1);
+		} else {
+			unsigned long update;
 			update = trans->delayed_ref_updates;
 			trans->delayed_ref_updates = 0;
 			if (update)
-				btrfs_run_delayed_refs(trans, root, update);
-			else
-				break;
+				btrfs_run_delayed_refs(trans, tree_root,
+						       update);
 		}
 	}
+	btrfs_release_path(root, path);
+	BUG_ON(err);
+
+	ret = btrfs_del_root(trans, tree_root, &root->root_key);
+	BUG_ON(ret);
+
+	free_extent_buffer(root->node);
+	free_extent_buffer(root->commit_root);
+	kfree(root);
 out:
+	btrfs_end_transaction(trans, tree_root);
+	kfree(wc);
 	btrfs_free_path(path);
-	return ret;
+	return err;
 }
 
+/*
+ * drop subtree rooted at tree block 'node'.
+ *
+ * NOTE: this function will unlock and release tree block 'node'
+ */
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct extent_buffer *node,
 			struct extent_buffer *parent)
 {
 	struct btrfs_path *path;
+	struct walk_control *wc;
 	int level;
 	int parent_level;
 	int ret = 0;
 	int wret;
 
+	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
+
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
+	wc = kzalloc(sizeof(*wc), GFP_NOFS);
+	BUG_ON(!wc);
+
 	btrfs_assert_tree_locked(parent);
 	parent_level = btrfs_header_level(parent);
 	extent_buffer_get(parent);
@@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 
 	btrfs_assert_tree_locked(node);
 	level = btrfs_header_level(node);
-	extent_buffer_get(node);
 	path->nodes[level] = node;
 	path->slots[level] = 0;
+	path->locks[level] = 1;
+
+	wc->refs[parent_level] = 1;
+	wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+	wc->level = level;
+	wc->shared_level = -1;
+	wc->stage = DROP_REFERENCE;
+	wc->update_ref = 0;
+	wc->keep_locks = 1;
 
 	while (1) {
-		wret = walk_down_tree(trans, root, path, &level);
-		if (wret < 0)
+		wret = walk_down_tree(trans, root, path, wc);
+		if (wret < 0) {
 			ret = wret;
-		if (wret != 0)
 			break;
+		}
 
-		wret = walk_up_tree(trans, root, path, &level, parent_level);
+		wret = walk_up_tree(trans, root, path, wc, parent_level);
 		if (wret < 0)
 			ret = wret;
 		if (wret != 0)
 			break;
 	}
 
+	kfree(wc);
 	btrfs_free_path(path);
 	return ret;
 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b23dc209ae10..008397934778 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work)
 		btrfs_end_transaction(trans, root);
 	}
 
-	btrfs_drop_dead_root(reloc_root);
+	btrfs_drop_snapshot(reloc_root, 0);
 
 	if (atomic_dec_and_test(async->num_pending))
 		complete(async->done);
@@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
 			ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
 			BUG_ON(ret);
-
-			btrfs_tree_unlock(eb);
-			free_extent_buffer(eb);
 		}
 		if (!lowest) {
 			btrfs_tree_unlock(upper->eb);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4e83457ea253..2dbf1c1f56ee 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 	return 0;
 }
 
+#if 0
 /*
  * when dropping snapshots, we generate a ton of delayed refs, and it makes
  * sense not to join the transaction while it is trying to flush the current
@@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root)
 	btrfs_btree_balance_dirty(tree_root, nr);
 	return ret;
 }
+#endif
 
 /*
  * new snapshots need to be created at a very specific time in the
@@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
 	while (!list_empty(&list)) {
 		root = list_entry(list.next, struct btrfs_root, root_list);
 		list_del_init(&root->root_list);
-		btrfs_drop_dead_root(root);
+		btrfs_drop_snapshot(root, 0);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 9427216476d4fa75103f39d4b228c47d56ba20da Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 2 Jul 2009 12:26:06 -0400
Subject: Btrfs: honor nodatacow/sum mount options for new files

The btrfs attr patches unconditionally inherited the inode flags field
without honoring nodatacow and nodatasum.  This fix makes sure
we properly record the nodatacow/sum mount options in new inodes.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1eacc78f6614..a48c084f6d3a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3587,12 +3587,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 		owner = 1;
 	BTRFS_I(inode)->block_group =
 			btrfs_find_block_group(root, 0, alloc_hint, owner);
-	if ((mode & S_IFREG)) {
-		if (btrfs_test_opt(root, NODATASUM))
-			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-		if (btrfs_test_opt(root, NODATACOW))
-			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
-	}
 
 	key[0].objectid = objectid;
 	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -3647,6 +3641,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 
 	btrfs_inherit_iflags(inode, dir);
 
+	if ((mode & S_IFREG)) {
+		if (btrfs_test_opt(root, NODATASUM))
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
+		if (btrfs_test_opt(root, NODATACOW))
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+	}
+
 	insert_inode_hash(inode);
 	inode_tree_add(inode);
 	return inode;
-- 
cgit v1.2.3


From 9b627e9bf49ebfeb060dfae0435bdba06cf27cb8 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 2 Jul 2009 13:50:58 -0400
Subject: Btrfs: fix use after free in btrfs_start_workers fail path

worker memory is already freed on one fail path in btrfs_start_workers,
but is still dereferenced. Switch the dereference and kfree.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/async-thread.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7f88628a1a72..6e4f6c50a120 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
 					   "btrfs-%s-%d", workers->name,
 					   workers->num_workers + i);
 		if (IS_ERR(worker->task)) {
-			kfree(worker);
 			ret = PTR_ERR(worker->task);
+			kfree(worker);
 			goto fail;
 		}
 
-- 
cgit v1.2.3


From 68f5a38c3ea4ae9cc7a40f86ff6d6d031583d93a Mon Sep 17 00:00:00 2001
From: Hu Tao <cnhutao@gmail.com>
Date: Thu, 2 Jul 2009 13:55:45 -0400
Subject: Btrfs: fix error message formatting

Make an error msg look nicer by inserting a space between number and word.

Signed-off-by: Hu Tao <hu.taoo@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cd64cfc14f7f..a5aca3997d42 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2696,7 +2696,7 @@ again:
 
 		printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
 		       ", %llu bytes_used, %llu bytes_reserved, "
-		       "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+		       "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
 		       "%llu total\n", (unsigned long long)bytes,
 		       (unsigned long long)data_sinfo->bytes_delalloc,
 		       (unsigned long long)data_sinfo->bytes_used,
-- 
cgit v1.2.3


From 033a666ccb842ab4134fcd0c861d5ba9f5d6bf3a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Jul 2009 14:35:32 +0100
Subject: NFSD: Don't hold unrefcounted creds over call to nfsd_setuser()

nfsd_open() gets an unrefcounted pointer to the current process's effective
credentials at the top of the function, then calls nfsd_setuser() via
fh_verify() - which may replace and destroy the current process's effective
credentials - and then passes the unrefcounted pointer to dentry_open() - but
the credentials may have been destroyed by this point.

Instead, the value from current_cred() should be passed directly to
dentry_open() as one of its arguments, rather than being cached in a variable.

Possibly fh_verify() should return the creds to use.

This is a regression introduced by
745ca2475a6ac596e3d8d37c2759c0fbe2586227 "CRED: Pass credentials through
dentry_open()".

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-and-Verified-By: Steve Dickson <steved@redhat.com>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/vfs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4145083dcf88..23341c1063bc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -678,7 +678,6 @@ __be32
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			int access, struct file **filp)
 {
-	const struct cred *cred = current_cred();
 	struct dentry	*dentry;
 	struct inode	*inode;
 	int		flags = O_RDONLY|O_LARGEFILE;
@@ -733,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 		vfs_dq_init(inode);
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
-			    flags, cred);
+			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
 	else
-- 
cgit v1.2.3


From 0cfae3d8795f388f9de78adb0171520d19da77e9 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Thu, 18 Jun 2009 11:42:53 +0900
Subject: nilfs2: remove unlikely directive causing mis-conversion of error
 code

The following error code handling in nilfs_segctor_write() function
wrongly converted negative error codes to a truth value (i.e. 1):

   err = unlikely(err) ? : res;

which originaly meant to be

   err = err ? : res;

This mis-conversion caused that write or sync functions receive the
unexpected error code.  This fixes the bug by removing the unlikely
directive.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: stable@kernel.org
---
 fs/nilfs2/segment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index aa977549919e..c1824915c1c7 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1829,8 +1829,8 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
 		err = nilfs_segbuf_write(segbuf, &wi);
 
 		res = nilfs_segbuf_wait(segbuf, &wi);
-		err = unlikely(err) ? : res;
-		if (unlikely(err))
+		err = err ? : res;
+		if (err)
 			return err;
 	}
 	return 0;
-- 
cgit v1.2.3


From 8227b29722fdbac72357aae155d171a5c777670c Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Thu, 18 Jun 2009 23:52:23 +0900
Subject: nilfs2: fix hang problem of log writer which occurs after write
 failures

Leandro Lucarella gave me a report that nilfs gets stuck after its
write function fails.

The problem turned out to be caused by bugs which leave writeback flag
on pages.  This fixes the problem by ensuring to clear the writeback
flag in error path.

Reported-by: Leandro Lucarella <llucax@gmail.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: stable@kernel.org
---
 fs/nilfs2/segment.c | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index c1824915c1c7..8b5e4778cf28 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1836,19 +1836,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
 	return 0;
 }
 
-static int nilfs_page_has_uncleared_buffer(struct page *page)
-{
-	struct buffer_head *head, *bh;
-
-	head = bh = page_buffers(page);
-	do {
-		if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers))
-			return 1;
-		bh = bh->b_this_page;
-	} while (bh != head);
-	return 0;
-}
-
 static void __nilfs_end_page_io(struct page *page, int err)
 {
 	if (!err) {
@@ -1872,12 +1859,11 @@ static void nilfs_end_page_io(struct page *page, int err)
 	if (!page)
 		return;
 
-	if (buffer_nilfs_node(page_buffers(page)) &&
-	    nilfs_page_has_uncleared_buffer(page))
-		/* For b-tree node pages, this function may be called twice
-		   or more because they might be split in a segment.
-		   This check assures that cleanup has been done for all
-		   buffers in a split btnode page. */
+	if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page))
+		/*
+		 * For b-tree node pages, this function may be called twice
+		 * or more because they might be split in a segment.
+		 */
 		return;
 
 	__nilfs_end_page_io(page, err);
@@ -1940,7 +1926,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
 			}
 			if (bh->b_page != fs_page) {
 				nilfs_end_page_io(fs_page, err);
-				if (unlikely(fs_page == failed_page))
+				if (fs_page && fs_page == failed_page)
 					goto done;
 				fs_page = bh->b_page;
 			}
-- 
cgit v1.2.3


From 4a52df779700080de4afb0436d9dd9188514a69b Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Thu, 18 Jun 2009 23:53:25 +0900
Subject: nilfs2: fix incorrect KERN_CRIT messages in case of write failures

In case of write-failure retries, the following KERN_CRIT level
messages are mistakenly output by nilfs_dat_commit_start() function:

nilfs_dat_commit_start: vbn = 408463, start = 12506, end = 18446744073709551615, pbn = 530210
nilfs_dat_commit_start: vbn = 408515, start = 12506, end = 18446744073709551615, pbn = 530211
nilfs_dat_commit_start: vbn = 408464, start = 12506, end = 18446744073709551615, pbn = 530212
...

This suppresses these messages.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: stable@kernel.org
---
 fs/nilfs2/dat.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 0b2710e2d565..8927ca27e6f7 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -134,15 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
 	entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
 					     req->pr_entry_bh, kaddr);
 	entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
-	if (entry->de_blocknr != cpu_to_le64(0) ||
-	    entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) {
-		printk(KERN_CRIT
-		       "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n",
-		       __func__, (unsigned long long)req->pr_entry_nr,
-		       (unsigned long long)le64_to_cpu(entry->de_start),
-		       (unsigned long long)le64_to_cpu(entry->de_end),
-		       (unsigned long long)le64_to_cpu(entry->de_blocknr));
-	}
 	entry->de_blocknr = cpu_to_le64(blocknr);
 	kunmap_atomic(kaddr, KM_USER0);
 
-- 
cgit v1.2.3


From ff54de363afa4583e2a6249f25fe21dfaeb11ea2 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Fri, 19 Jun 2009 02:53:56 +0900
Subject: nilfs2: fix lockdep warning between regular file and inode file

This will fix the following false positive of recursive locking which
lockdep has detected:

=============================================
[ INFO: possible recursive locking detected ]
2.6.30-nilfs #42
---------------------------------------------
nilfs_cleanerd/10607 is trying to acquire lock:
 (&bmap->b_sem){++++-.}, at: [<e0d025b7>] nilfs_bmap_lookup_at_level+0x1a/0x74 [nilfs2]

but task is already holding lock:
 (&bmap->b_sem){++++-.}, at: [<e0d024e0>] nilfs_bmap_truncate+0x19/0x6a [nilfs2]
other info that might help us debug this:
2 locks held by nilfs_cleanerd/10607:
 #0:  (&nilfs->ns_segctor_sem){++++.+}, at: [<e0d0d75a>] nilfs_transaction_begin+0xb6/0x10c [nilfs2]
 #1:  (&bmap->b_sem){++++-.}, at: [<e0d024e0>] nilfs_bmap_truncate+0x19/0x6a [nilfs2]

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/bmap.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 36df60b6d8a4..99d58a028b94 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -568,6 +568,7 @@ void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
 }
 
 static struct lock_class_key nilfs_bmap_dat_lock_key;
+static struct lock_class_key nilfs_bmap_mdt_lock_key;
 
 /**
  * nilfs_bmap_read - read a bmap from an inode
@@ -603,7 +604,11 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
 		bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
 		bmap->b_last_allocated_key = 0;
 		bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
+		lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
 		break;
+	case NILFS_IFILE_INO:
+		lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
+		/* Fall through */
 	default:
 		bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
 		bmap->b_last_allocated_key = 0;
-- 
cgit v1.2.3


From d9a0a345ab7a58a30ec38e5bb7401a28714914d2 Mon Sep 17 00:00:00 2001
From: Jiro SEKIBA <jir@unicus.jp>
Date: Sat, 4 Jul 2009 23:00:53 +0900
Subject: nilfs2: fix disorder in cp count on error during deleting checkpoints

This fixes a bug that checkpoint count gets wrong on errors when
deleting a series of checkpoints.

The count error is persistent since the checkpoint count is stored on
disk.  Some userland programs refer to the count via ioctl, and this
bugfix is needed to prevent malfunction of such programs.

Signed-off-by: Jiro SEKIBA <jir@unicus.jp>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: stable@kernel.org
---
 fs/nilfs2/cpfile.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 7d49813f66d6..aec942cf79e3 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -307,7 +307,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 		ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
 		if (ret < 0) {
 			if (ret != -ENOENT)
-				goto out_header;
+				break;
 			/* skip hole */
 			ret = 0;
 			continue;
@@ -340,7 +340,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 					continue;
 				printk(KERN_ERR "%s: cannot delete block\n",
 				       __func__);
-				goto out_header;
+				break;
 			}
 		}
 
@@ -358,7 +358,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 		kunmap_atomic(kaddr, KM_USER0);
 	}
 
- out_header:
 	brelse(header_bh);
 
  out_sem:
-- 
cgit v1.2.3


From e3dc5a665d39112e98cfd5bbc7fda2963c00c12c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Mon, 22 Jun 2009 17:31:09 +0300
Subject: UBIFS: fix integer overflow warning

Fix the following warning:

fs/ubifs/io.c: In function 'ubifs_wbuf_init':
fs/ubifs/io.c:860: warning: integer overflow in expression

And limit maximum hrtimer delta to ULONG_MAX because the
argument is 'unsigned long'.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/io.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index bc5857199ec2..2d41ae1d6607 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -857,7 +857,9 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
 	 * and hard limits.
 	 */
 	hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0);
-	wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10;
+	wbuf->delta = DEFAULT_WBUF_TIMEOUT_SECS * 1000000000ULL * 2 / 10;
+	if (wbuf->delta > ULONG_MAX)
+		wbuf->delta = ULONG_MAX;
 	wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta);
 	hrtimer_set_expires_range_ns(&wbuf->timer,  wbuf->softlimit,
 				     wbuf->delta);
-- 
cgit v1.2.3


From 70aee2f153972f70fad5f7025134fec063f9efbe Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 18 Jun 2009 13:37:15 +0300
Subject: UBIFS: improve debugging messaged

1. Make the I/O debugging message print the journal head number.
2. Add prints to timer functions.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/io.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 2d41ae1d6607..2ef689a9a363 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -297,6 +297,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
 {
 	struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
 
+	dbg_io("jhead %d", wbuf->jhead);
 	wbuf->need_sync = 1;
 	wbuf->c->need_wbuf_sync = 1;
 	ubifs_wake_up_bgt(wbuf->c);
@@ -313,6 +314,9 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
 
 	if (!ktime_to_ns(wbuf->softlimit))
 		return;
+	dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead,
+	       ktime_to_ns(wbuf->softlimit)/USEC_PER_SEC,
+	       (ktime_to_ns(wbuf->softlimit) + wbuf->delta)/USEC_PER_SEC);
 	hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
 			       HRTIMER_MODE_REL);
 }
@@ -349,8 +353,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
 		/* Write-buffer is empty or not seeked */
 		return 0;
 
-	dbg_io("LEB %d:%d, %d bytes",
-	       wbuf->lnum, wbuf->offs, wbuf->used);
+	dbg_io("LEB %d:%d, %d bytes, jhead %d",
+	       wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead);
 	ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
 	ubifs_assert(!(wbuf->avail & 7));
 	ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
@@ -399,7 +403,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
 {
 	const struct ubifs_info *c = wbuf->c;
 
-	dbg_io("LEB %d:%d", lnum, offs);
+	dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead);
 	ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
 	ubifs_assert(offs >= 0 && offs <= c->leb_size);
 	ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
@@ -506,9 +510,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 	struct ubifs_info *c = wbuf->c;
 	int err, written, n, aligned_len = ALIGN(len, 8), offs;
 
-	dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len,
-	       dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum,
-	       wbuf->offs + wbuf->used);
+	dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len,
+	       dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead,
+	       wbuf->lnum, wbuf->offs + wbuf->used);
 	ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
 	ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
 	ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
@@ -533,8 +537,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 		memcpy(wbuf->buf + wbuf->used, buf, len);
 
 		if (aligned_len == wbuf->avail) {
-			dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum,
-				wbuf->offs);
+			dbg_io("flush jhead %d wbuf to LEB %d:%d",
+			       wbuf->jhead, wbuf->lnum, wbuf->offs);
 			err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
 					    wbuf->offs, c->min_io_size,
 					    wbuf->dtype);
@@ -562,7 +566,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 	 * minimal I/O unit. We have to fill and flush write-buffer and switch
 	 * to the next min. I/O unit.
 	 */
-	dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs);
+	dbg_io("flush jhead %d wbuf to LEB %d:%d",
+	       wbuf->jhead, wbuf->lnum, wbuf->offs);
 	memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
 	err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
 			    c->min_io_size, wbuf->dtype);
@@ -695,7 +700,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 	int err, rlen, overlap;
 	struct ubifs_ch *ch = buf;
 
-	dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
+	dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs,
+	       dbg_ntype(type), len, wbuf->jhead);
 	ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
 	ubifs_assert(!(offs & 7) && offs < c->leb_size);
 	ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
-- 
cgit v1.2.3


From 0b335b9d7d5f0b832e90ac469480789c07be80ad Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 23 Jun 2009 12:30:43 +0300
Subject: UBIFS: slightly optimize write-buffer timer usage

This patch adds the following minor optimization:

1. If write-buffer does not use the timer, indicate it with the
   wbuf->no_timer variable, instead of using the wbuf->softlimit
   variable. This is better because wbuf->softlimit is of ktime_t
   type, and the ktime_to_ns function contains 64-bit multiplication.

2. Do not call the 'hrtimer_cancel()' function for write-buffers
   which do not use timers.

3. Do not cancel the timer in 'ubifs_put_super()' because the
   synchronization function does this.

This patch also removes a confusing comment.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/io.c    | 9 +++------
 fs/ubifs/super.c | 6 ++----
 fs/ubifs/ubifs.h | 6 ++++--
 3 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 2ef689a9a363..9fcf6c38c1bc 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -312,7 +312,7 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
 {
 	ubifs_assert(!hrtimer_active(&wbuf->timer));
 
-	if (!ktime_to_ns(wbuf->softlimit))
+	if (wbuf->no_timer)
 		return;
 	dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead,
 	       ktime_to_ns(wbuf->softlimit)/USEC_PER_SEC,
@@ -327,11 +327,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
  */
 static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
 {
-	/*
-	 * If the syncer is waiting for the lock (from the background thread's
-	 * context) and another task is changing write-buffer then the syncing
-	 * should be canceled.
-	 */
+	if (wbuf->no_timer)
+		return;
 	wbuf->need_sync = 0;
 	hrtimer_cancel(&wbuf->timer);
 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 79fad43f3c57..5bb272c56a9b 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -797,7 +797,7 @@ static int alloc_wbufs(struct ubifs_info *c)
 	 * does not need to be synchronized by timer.
 	 */
 	c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
-	c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0);
+	c->jheads[GCHD].wbuf.no_timer = 1;
 
 	return 0;
 }
@@ -1754,10 +1754,8 @@ static void ubifs_put_super(struct super_block *sb)
 
 		/* Synchronize write-buffers */
 		if (c->jheads)
-			for (i = 0; i < c->jhead_cnt; i++) {
+			for (i = 0; i < c->jhead_cnt; i++)
 				ubifs_wbuf_sync(&c->jheads[i].wbuf);
-				hrtimer_cancel(&c->jheads[i].wbuf.timer);
-			}
 
 		/*
 		 * On fatal errors c->ro_media is set to 1, in which case we do
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 1bf01d820066..97bc9d09d54b 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -654,7 +654,8 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
  * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit
  *         and @softlimit + @delta)
  * @timer: write-buffer timer
- * @need_sync: it is set if its timer expired and needs sync
+ * @no_timer: non-zero if this write-buffer does not timer
+ * @need_sync: non-zero if its timer expired and needs sync
  * @next_ino: points to the next position of the following inode number
  * @inodes: stores the inode numbers of the nodes which are in wbuf
  *
@@ -683,7 +684,8 @@ struct ubifs_wbuf {
 	ktime_t softlimit;
 	unsigned long long delta;
 	struct hrtimer timer;
-	int need_sync;
+	unsigned int no_timer:1;
+	unsigned int need_sync:1;
 	int next_ino;
 	ino_t *inodes;
 };
-- 
cgit v1.2.3


From 2a35a3a8ab3e94afd631ed4b45878ceb98f7ab28 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 23 Jun 2009 20:26:33 +0300
Subject: UBIFS: set write-buffer timout to 3-5 seconds

This patch cleans up write-buffer timeout initialization and
sets it to 3-5 interval.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/io.c    | 17 ++++-------------
 fs/ubifs/ubifs.h |  5 +++--
 2 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 9fcf6c38c1bc..48d0af94b26a 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -828,7 +828,6 @@ out:
 int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
 {
 	size_t size;
-	ktime_t hardlimit;
 
 	wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
 	if (!wbuf->buf)
@@ -854,18 +853,10 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
 
 	hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	wbuf->timer.function = wbuf_timer_callback_nolock;
-	/*
-	 * Make write-buffer soft limit to be 20% of the hard limit. The
-	 * write-buffer timer is allowed to expire any time between the soft
-	 * and hard limits.
-	 */
-	hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0);
-	wbuf->delta = DEFAULT_WBUF_TIMEOUT_SECS * 1000000000ULL * 2 / 10;
-	if (wbuf->delta > ULONG_MAX)
-		wbuf->delta = ULONG_MAX;
-	wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta);
-	hrtimer_set_expires_range_ns(&wbuf->timer,  wbuf->softlimit,
-				     wbuf->delta);
+	wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0);
+	wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT;
+	wbuf->delta *= 1000000000ULL;
+	ubifs_assert(wbuf->delta <= ULONG_MAX);
 	return 0;
 }
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 97bc9d09d54b..c3a707d458a1 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -95,8 +95,9 @@
  */
 #define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
 
-/* Default write-buffer synchronization timeout in seconds */
-#define DEFAULT_WBUF_TIMEOUT_SECS 5
+/* Write-buffer synchronization timeout interval in seconds */
+#define WBUF_TIMEOUT_SOFTLIMIT 3
+#define WBUF_TIMEOUT_HARDLIMIT 5
 
 /* Maximum possible inode number (only 32-bit inodes are supported now) */
 #define MAX_INUM 0xFFFFFFFF
-- 
cgit v1.2.3


From cb54ef8b1304fe25f3d57031e0f85558a043239f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 23 Jun 2009 20:30:32 +0300
Subject: UBIFS: few spelling fixes

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/io.c    | 6 +++---
 fs/ubifs/super.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 48d0af94b26a..7e4267ecfa56 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -391,7 +391,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
  * @offs: logical eraseblock offset to seek to
  * @dtype: data type
  *
- * This function targets the write buffer to logical eraseblock @lnum:@offs.
+ * This function targets the write-buffer to logical eraseblock @lnum:@offs.
  * The write-buffer is synchronized if it is not empty. Returns zero in case of
  * success and a negative error code in case of failure.
  */
@@ -822,7 +822,7 @@ out:
  * @c: UBIFS file-system description object
  * @wbuf: write-buffer to initialize
  *
- * This function initializes write buffer. Returns zero in case of success
+ * This function initializes write-buffer. Returns zero in case of success
  * %-ENOMEM in case of failure.
  */
 int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
@@ -862,7 +862,7 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
 
 /**
  * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
- * @wbuf: the write-buffer whereto add
+ * @wbuf: the write-buffer where to add
  * @inum: the inode number
  *
  * This function adds an inode number to the inode array of the write-buffer.
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 5bb272c56a9b..76405c457a38 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -986,7 +986,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
 		switch (token) {
 		/*
 		 * %Opt_fast_unmount and %Opt_norm_unmount options are ignored.
-		 * We accepte them in order to be backware-compatible. But this
+		 * We accept them in order to be backward-compatible. But this
 		 * should be removed at some point.
 		 */
 		case Opt_fast_unmount:
-- 
cgit v1.2.3


From 44737589442bf69d811e003d9d0064b8fc1541d6 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Wed, 24 Jun 2009 10:15:12 +0300
Subject: UBIFS: fix 64-bit divisions in debug print

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
---
 fs/ubifs/io.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 7e4267ecfa56..762a7d6cec73 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -315,8 +315,9 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
 	if (wbuf->no_timer)
 		return;
 	dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead,
-	       ktime_to_ns(wbuf->softlimit)/USEC_PER_SEC,
-	       (ktime_to_ns(wbuf->softlimit) + wbuf->delta)/USEC_PER_SEC);
+	       div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
+	       div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
+		       USEC_PER_SEC));
 	hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
 			       HRTIMER_MODE_REL);
 }
-- 
cgit v1.2.3


From 681947d2fa1a00629de33c4df3ca72c39f06a14c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Wed, 24 Jun 2009 09:59:38 +0300
Subject: UBIFS: minor spelling and grammar fixes

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
---
 fs/ubifs/replay.c | 2 +-
 fs/ubifs/ubifs.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 11cc80125a49..769be42f39d6 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -957,7 +957,7 @@ out:
 	return err;
 
 out_dump:
-	ubifs_err("log error detected while replying the log at LEB %d:%d",
+	ubifs_err("log error detected while replaying the log at LEB %d:%d",
 		  lnum, offs + snod->offs);
 	dbg_dump_node(c, snod->node);
 	ubifs_scan_destroy(sleb);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index c3a707d458a1..a29349094422 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -655,8 +655,8 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
  * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit
  *         and @softlimit + @delta)
  * @timer: write-buffer timer
- * @no_timer: non-zero if this write-buffer does not timer
- * @need_sync: non-zero if its timer expired and needs sync
+ * @no_timer: non-zero if this write-buffer does not have a timer
+ * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing
  * @next_ino: points to the next position of the following inode number
  * @inodes: stores the inode numbers of the nodes which are in wbuf
  *
-- 
cgit v1.2.3


From 1fb8bd01ed0af0d0577e010e8c6b4234de583fa6 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 28 Jun 2009 18:31:58 +0300
Subject: UBIFS: fix assertion warning

When debugging is enabled and an unclean file-system is mounter,
the following assertion is triggered:

UBIFS assert failed in ubifs_tnc_start_commit at 805 (pid 1081)
Call Trace:
[cfaffbd0] [c0006cf8] show_stack+0x44/0x16c (unreliable)
[cfaffc10] [c011b738] ubifs_tnc_start_commit+0xbb8/0xd18
[cfaffc90] [c0112670] do_commit+0x150/0xa44
[cfaffd10] [c0125234] ubifs_rcvry_gc_commit+0xd8/0x544
[cfaffd60] [c0100e9c] ubifs_fill_super+0xe78/0x15f8
[cfaffdf0] [c0102118] ubifs_get_sb+0x20c/0x320
[cfaffe70] [c007f764] vfs_kern_mount+0x58/0xe0
[cfaffe90] [c007f83c] do_kern_mount+0x40/0xf8
[cfaffeb0] [c0095c24] do_mount+0x550/0x758
[cfafff10] [c0095ebc] sys_mount+0x90/0xe0
[cfafff40] [c000ed4c] ret_from_syscall+0x0/0x3c

The reason is that we initialize 'c->min_leb_idx' early, and do
not re-calculate it after journal replay.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/super.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 76405c457a38..3507d0ed542d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1287,6 +1287,9 @@ static int mount_ubifs(struct ubifs_info *c)
 	if (err)
 		goto out_journal;
 
+	/* Calculate 'min_idx_lebs' after journal replay */
+	c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
 	err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
 	if (err)
 		goto out_orphans;
-- 
cgit v1.2.3


From 7fcd9c3ecbf09c0a77db7ba01aac75b32fb79a93 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 2 Jul 2009 17:15:47 +0200
Subject: UBIFS: allow more than one volume to be mounted

UBIFS uses a bdi device per volume, but does not care to hand out unique
names to each of them. This causes an error when trying to mount more
than one volumes. Append the UBI volume and device ID to avoid that.

[Amended a bit by Artem Bityutskiy]

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Artem Bityutskiy <dedekind@infradead.org>
Cc: Adrian Hunter <ext-adrian.hunter@nokia.com>
Cc: linux-mtd@lists.infradead.org
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/ubifs/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3507d0ed542d..26d2e0d80465 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1976,7 +1976,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 	err  = bdi_init(&c->bdi);
 	if (err)
 		goto out_close;
-	err = bdi_register(&c->bdi, NULL, "ubifs");
+	err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d",
+			   c->vi.ubi_num, c->vi.vol_id);
 	if (err)
 		goto out_bdi;
 
-- 
cgit v1.2.3


From 3beab0b42413e83a7907db7176b54c840fc75a81 Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Date: Sun, 5 Jul 2009 12:08:08 -0700
Subject: sys_sync(): fix 16% performance regression in ffsb create_4k test

I run many ffsb test cases on JBODs (typically 13/12 disks).  Comparing
with kernel 2.6.30, 2.6.31-rc1 has about 16% regression with
ffsb_create_4k.  The sub test case creates files continuously for 10
minitues and every file is 1MB.

Bisect located below patch.

5cee5815d1564bbbd505fea86f4550f1efdb5cd0 is first bad commit
commit 5cee5815d1564bbbd505fea86f4550f1efdb5cd0
Author: Jan Kara <jack@suse.cz>
Date:   Mon Apr 27 16:43:51 2009 +0200

    vfs: Make sys_sync() use fsync_super() (version 4)

    It is unnecessarily fragile to have two places (fsync_super() and do_sync())
    doing data integrity sync of the filesystem. Alter __fsync_super() to
    accommodate needs of both callers and use it. So after this patch
    __fsync_super() is the only place where we gather all the calls needed to
    properly send all data on a filesystem to disk.

As a matter of fact, ffsb calls sys_sync in the end to make sure all data
is flushed to disks and the flushing is counted into the result.  vmstat
shows ffsb is blocked when syncing for a long time.  With 2.6.30, ffsb is
blocked for a short time.

I checked the patch and did experiments to recover the original methods.
Eventually, the root cause is the patch deletes the calling to
wakeup_pdflush when syncing, so only ffsb is blocked on disk I/O.
wakeup_pdflush could ask pdflush to write back pages with ffsb at the
same time.

[akpm@linux-foundation.org: restore comment too]
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/sync.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/sync.c b/fs/sync.c
index dd200025af85..3422ba61d86d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -112,8 +112,13 @@ restart:
 	mutex_unlock(&mutex);
 }
 
+/*
+ * sync everything.  Start out by waking pdflush, because that writes back
+ * all queues in parallel.
+ */
 SYSCALL_DEFINE0(sync)
 {
+	wakeup_pdflush(0);
 	sync_filesystems(0);
 	sync_filesystems(1);
 	if (unlikely(laptop_mode))
-- 
cgit v1.2.3


From 793285fcafce4719a05e0c99fa74b188157fe7fe Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 5 Jul 2009 12:08:26 -0700
Subject: cred_guard_mutex: do not return -EINTR to user-space

do_execve() and ptrace_attach() return -EINTR if
mutex_lock_interruptible(->cred_guard_mutex) fails.

This is not right, change the code to return ERESTARTNOINTR.

Perhaps we should also change proc_pid_attr_write().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat.c | 4 ++--
 fs/exec.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index cdd51a3a7c53..fbadb947727b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1486,8 +1486,8 @@ int compat_do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
-	if (retval < 0)
+	retval = -ERESTARTNOINTR;
+	if (mutex_lock_interruptible(&current->cred_guard_mutex))
 		goto out_free;
 	current->in_execve = 1;
 
diff --git a/fs/exec.c b/fs/exec.c
index e639957d7a57..4a8849e45b21 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1277,8 +1277,8 @@ int do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
-	if (retval < 0)
+	retval = -ERESTARTNOINTR;
+	if (mutex_lock_interruptible(&current->cred_guard_mutex))
 		goto out_free;
 	current->in_execve = 1;
 
-- 
cgit v1.2.3


From d01730d74d2b0155da50d44555001706294014f7 Mon Sep 17 00:00:00 2001
From: Jiaying Zhang <jiayingz@google.com>
Date: Tue, 7 Jul 2009 18:15:21 +0200
Subject: quota: Fix possible deadlock during parallel quotaon and quotaoff

The following test script triggers a deadlock on ext2 filesystem:
while true; do quotaon /dev/hda >&/dev/null; usleep $RANDOM; done &
while true; do quotaoff /dev/hda >&/dev/null; usleep $RANDOM; done &

I found there is a potential deadlock between quotaon and quotaoff (or
quotasync). Basically, all of quotactl operations need to be protected by
dqonoff_mutex. vfs_quota_off and vfs_quota_sync also call sb->s_op->quota_write
that needs to grab the i_mutex of the quota file.  But in vfs_quota_on_inode
(called from quotaon operation), the current code tries to grab  the i_mutex of
the quota file first before getting quonoff_mutex.

Reverse the order in which we take locks in vfs_quota_on_inode().

Jan Kara: Changed changelog to be more readable, made lockdep happy with
  I_MUTEX_QUOTA.

Signed-off-by: Jiaying Zhang <jiayingz@google.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 607c579e5eca..70f36c043d62 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2042,8 +2042,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		 * changes */
 		invalidate_bdev(sb->s_bdev);
 	}
-	mutex_lock(&inode->i_mutex);
 	mutex_lock(&dqopt->dqonoff_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	if (sb_has_quota_loaded(sb, type)) {
 		error = -EBUSY;
 		goto out_lock;
@@ -2094,7 +2094,6 @@ out_file_init:
 	dqopt->files[type] = NULL;
 	iput(inode);
 out_lock:
-	mutex_unlock(&dqopt->dqonoff_mutex);
 	if (oldflags != -1) {
 		down_write(&dqopt->dqptr_sem);
 		/* Set the flags back (in the case of accidental quotaon()
@@ -2104,6 +2103,7 @@ out_lock:
 		up_write(&dqopt->dqptr_sem);
 	}
 	mutex_unlock(&inode->i_mutex);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
 	put_quota_format(fmt);
 
-- 
cgit v1.2.3


From b43f3cbd21ffbd719fd4fa6642bfe6af255ded34 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Jul 2009 01:54:37 +0400
Subject: headers: mnt_namespace.h redux

Fix various silly problems wrt mnt_namespace.h:

 - exit_mnt_ns() isn't used, remove it
 - done that, sched.h and nsproxy.h inclusions aren't needed
 - mount.h inclusion was need for vfsmount_lock, but no longer
 - remove mnt_namespace.h inclusion from files which don't use anything
   from mnt_namespace.h

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/mntpt.c      | 1 -
 fs/namespace.c      | 1 +
 fs/nfs/getroot.c    | 1 -
 fs/reiserfs/super.c | 1 -
 4 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index c52be53f6946..5ffb570cd3a8 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -17,7 +17,6 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/mnt_namespace.h>
 #include "internal.h"
 
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 3dc283fd4716..277c28a63ead 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -22,6 +22,7 @@
 #include <linux/seq_file.h>
 #include <linux/mnt_namespace.h>
 #include <linux/namei.h>
+#include <linux/nsproxy.h>
 #include <linux/security.h>
 #include <linux/mount.h>
 #include <linux/ramfs.h>
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 46177cb87064..b35d2a616066 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -30,7 +30,6 @@
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/namei.h>
-#include <linux/mnt_namespace.h>
 #include <linux/security.h>
 
 #include <asm/system.h>
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d3aeb061612b..7adea74d6a8a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -24,7 +24,6 @@
 #include <linux/exportfs.h>
 #include <linux/quotaops.h>
 #include <linux/vfs.h>
-#include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
-- 
cgit v1.2.3


From d5ce5b40bc66880d1732461d4b47d7fc3331ed30 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 8 Jul 2009 11:17:34 +0100
Subject: Free the memory allocated by memdup_user() in fs/sysfs/bin.c

Commit 1c8542c7bb replaced kmalloc() with memdup_user() in the write()
function but also dropped the kfree(temp). The memdup_user() function
allocates memory which is never freed.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Parag Warudkar <parag.warudkar@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/sysfs/bin.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 9345806c8853..2524714bece1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -171,6 +171,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
 	if (count > 0)
 		*off = offs + count;
 
+	kfree(temp);
 	return count;
 }
 
-- 
cgit v1.2.3


From ad361c9884e809340f6daca80d56a9e9c871690a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 6 Jul 2009 13:05:40 -0700
Subject: Remove multiple KERN_ prefixes from printk formats

Commit 5fd29d6ccbc98884569d6f3105aeca70858b3e0f ("printk: clean up
handling of log-levels and newlines") changed printk semantics.  printk
lines with multiple KERN_<level> prefixes are no longer emitted as
before the patch.

<level> is now included in the output on each additional use.

Remove all uses of multiple KERN_<level>s in formats.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jffs2/erase.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index a0244740b75a..b47679be118a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -270,19 +270,21 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
 	D2({
 		int i=0;
 		struct jffs2_raw_node_ref *this;
-		printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n" KERN_DEBUG);
+		printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n");
 
 		this = ic->nodes;
 
+		printk(KERN_DEBUG);
 		while(this) {
-			printk( "0x%08x(%d)->", ref_offset(this), ref_flags(this));
+			printk(KERN_CONT "0x%08x(%d)->",
+			       ref_offset(this), ref_flags(this));
 			if (++i == 5) {
-				printk("\n" KERN_DEBUG);
+				printk(KERN_DEBUG);
 				i=0;
 			}
 			this = this->next_in_ino;
 		}
-		printk("\n");
+		printk(KERN_CONT "\n");
 	});
 
 	switch (ic->class) {
-- 
cgit v1.2.3


From 5ddf1e0ff00fd808c048d0b920784828276cc516 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sun, 5 Jul 2009 11:01:02 -0400
Subject: cifs: fix regression with O_EXCL creates and optimize away lookup

cifs: fix regression with O_EXCL creates and optimize away lookup

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Tested-by: Shirish Pargaonkar <shirishp@gmail.com>
CC: Stable Kernel <stable@kernel.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/dir.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index a40054faed7f..ff55fc6932cb 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -643,6 +643,15 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 			}
 	}
 
+	/*
+	 * O_EXCL: optimize away the lookup, but don't hash the dentry. Let
+	 * the VFS handle the create.
+	 */
+	if (nd->flags & LOOKUP_EXCL) {
+		d_instantiate(direntry, NULL);
+		return 0;
+	}
+
 	/* can not grab the rename sem here since it would
 	deadlock in the cases (beginning of sys_rename itself)
 	in which we already have the sb rename sem */
-- 
cgit v1.2.3


From 086b3640c10ab448a6993c4bae1508f496f530c4 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 29 Jun 2009 16:25:33 +0300
Subject: UBIFS: dump a little more in case of corruptions

In case of corruptions, dump 8192 bytes instead of 4096. The
largest node is 4096+ bytes, so it is better to see a node
boundary, which is not always possible when only 4096 bytes
are printed.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Adrian Hunter <Adrian.Hunter@nokia.com>
---
 fs/ubifs/scan.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 0ed82479b44b..165c14ba1a46 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -238,12 +238,12 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
 {
 	int len;
 
-	ubifs_err("corrupted data at LEB %d:%d", lnum, offs);
+	ubifs_err("corruption at LEB %d:%d", lnum, offs);
 	if (dbg_failure_mode)
 		return;
 	len = c->leb_size - offs;
-	if (len > 4096)
-		len = 4096;
+	if (len > 8192)
+		len = 8192;
 	dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
 	print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
 }
-- 
cgit v1.2.3


From ed43f2f06cc1cec7ec2dc235c908530bc8c796eb Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 29 Jun 2009 17:59:23 +0300
Subject: UBIFS: small amendments in the LEB scanning code

This patch fixes few minor things I've spotted while going through
code:

1. Better document return codes
2. If 'ubifs_scan_a_node()' returns some thing we do not expect,
   treat this as an error.
3. Try to do recovery only when 'ubifs_scan()' returns %-EUCLEAN,
   not on any error.
4. If empty space starts at a non-aligned address, print a message.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Adrian Hunter <Adrian.Hunter@nokia.com>
---
 fs/ubifs/recovery.c |  7 ++++---
 fs/ubifs/replay.c   |  7 ++++---
 fs/ubifs/scan.c     | 14 +++++++++-----
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 805605250f12..093a1ecb700f 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -543,8 +543,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
  *
  * This function does a scan of a LEB, but caters for errors that might have
  * been caused by the unclean unmount from which we are attempting to recover.
- *
- * This function returns %0 on success and a negative error code on failure.
+ * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
+ * found, and a negative error code in case of failure.
  */
 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 					 int offs, void *sbuf, int grouped)
@@ -643,7 +643,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 			goto corrupted;
 		default:
 			dbg_err("unknown");
-			goto corrupted;
+			err = -EINVAL;
+			goto error;
 		}
 	}
 
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 769be42f39d6..2970500f32df 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -837,9 +837,10 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 
 	dbg_mnt("replay log LEB %d:%d", lnum, offs);
 	sleb = ubifs_scan(c, lnum, offs, sbuf);
-	if (IS_ERR(sleb)) {
-		if (c->need_recovery)
-			sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
+	if (IS_ERR(sleb) ) {
+		if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
+			return PTR_ERR(sleb);
+		sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
 		if (IS_ERR(sleb))
 			return PTR_ERR(sleb);
 	}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 165c14ba1a46..892ebfee4fe5 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -256,7 +256,9 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
  * @sbuf: scan buffer (must be c->leb_size)
  *
  * This function scans LEB number @lnum and returns complete information about
- * its contents. Returns an error code in case of failure.
+ * its contents. Returns the scaned information in case of success and,
+ * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
+ * of failure.
  */
 struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
 				  int offs, void *sbuf)
@@ -279,7 +281,6 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
 		cond_resched();
 
 		ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
-
 		if (ret > 0) {
 			/* Padding bytes or a valid padding node */
 			offs += ret;
@@ -304,7 +305,8 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
 			goto corrupted;
 		default:
 			dbg_err("unknown");
-			goto corrupted;
+			err = -EINVAL;
+			goto error;
 		}
 
 		err = ubifs_add_snod(c, sleb, buf, offs);
@@ -317,8 +319,10 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
 		len -= node_len;
 	}
 
-	if (offs % c->min_io_size)
-		goto corrupted;
+	if (offs % c->min_io_size) {
+		ubifs_err("empty space starts at non-aligned offset %d", offs);
+		goto corrupted;;
+	}
 
 	ubifs_end_scan(c, sleb, lnum, offs);
 
-- 
cgit v1.2.3


From 431102fed3effe4e4e19678830ddab7f05c34bf9 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 29 Jun 2009 18:58:34 +0300
Subject: UBIFS: clean up free space checking

recovery.c has 'is_empty()' helper and it is better to use
this helper instead of re-implementing it in several places.
This patch does this and removes some amount of unneeded code.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Adrian Hunter <Adrian.Hunter@nokia.com>
---
 fs/ubifs/recovery.c | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 093a1ecb700f..fe7af9f676b0 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -357,11 +357,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
 	empty_offs = ALIGN(offs + 1, c->min_io_size);
 	check_len = c->leb_size - empty_offs;
 	p = buf + empty_offs - offs;
-
-	for (; check_len > 0; check_len--)
-		if (*p++ != 0xff)
-			return 0;
-	return 1;
+	return is_empty(p, check_len);
 }
 
 /**
@@ -814,7 +810,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 static int recover_head(const struct ubifs_info *c, int lnum, int offs,
 			void *sbuf)
 {
-	int len, err, need_clean = 0;
+	int len, err;
 
 	if (c->min_io_size > 1)
 		len = c->min_io_size;
@@ -828,19 +824,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
 
 	/* Read at the head location and check it is empty flash */
 	err = ubi_read(c->ubi, lnum, sbuf, offs, len);
-	if (err)
-		need_clean = 1;
-	else {
-		uint8_t *p = sbuf;
-
-		while (len--)
-			if (*p++ != 0xff) {
-				need_clean = 1;
-				break;
-			}
-	}
-
-	if (need_clean) {
+	if (err || !is_empty(sbuf, len)) {
 		dbg_rcvry("cleaning head at %d:%d", lnum, offs);
 		if (offs == 0)
 			return ubifs_leb_unmap(c, lnum);
-- 
cgit v1.2.3


From 061125476039a9a998878468a6abe235b1cee347 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 29 Jun 2009 19:27:14 +0300
Subject: UBIFS: fix corruption dump

In the 'ubifs_recover_leb()' function, when we find corrupted
empty space, we dump 8K starting from the offset where the last
node ends. This is OK if the corrupted empty space is somewhere
near that offset. But if the corruption is far at the end of the
LEB, we will dump all 0xFF bytes and complitely ignore the
interesting data. This is observed on a PPC ("kilauea") with
NOR flash.

This patch changes the behavior and teaches UBIFS to print only
interesting data. I.e., now we find where corruption starts and
start dumping from that offset.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Adrian Hunter <Adrian.Hunter@nokia.com>
---
 fs/ubifs/recovery.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index fe7af9f676b0..e5f6cf8a1155 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -52,6 +52,25 @@ static int is_empty(void *buf, int len)
 	return 1;
 }
 
+/**
+ * first_non_ff - find offset of the first non-0xff byte.
+ * @buf: buffer to search in
+ * @len: length of buffer
+ *
+ * This function returns offset of the first non-0xff byte in @buf or %-1 if
+ * the buffer contains only 0xff bytes.
+ */
+static int first_non_ff(void *buf, int len)
+{
+	uint8_t *p = buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		if (*p++ != 0xff)
+			return i;
+	return -1;
+}
+
 /**
  * get_master_node - get the last valid master node allowing for corruption.
  * @c: UBIFS file-system description object
@@ -649,8 +668,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 			clean_buf(c, &buf, lnum, &offs, &len);
 			need_clean = 1;
 		} else {
-			ubifs_err("corrupt empty space at LEB %d:%d",
-				  lnum, offs);
+			int corruption = first_non_ff(buf, len);
+
+			ubifs_err("corrupt empty space LEB %d:%d, corruption "
+				  "starts at %d", lnum, offs, corruption);
+			/* Make sure we dump interesting non-0xFF data */
+			offs = corruption;
+			buf += corruption;
 			goto corrupted;
 		}
 	}
-- 
cgit v1.2.3


From c4c1bff64dfff4e6dd0936a0340f56b9284512c8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 9 Jul 2009 20:02:48 -0400
Subject: cifs: add pid of initiating process to spnego upcall info

cifs: add pid of initiating process to spnego upcall info

This will allow the upcall to poke in /proc/<pid>/environ and get
the value of the $KRB5CCNAME env var for the process.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifs_spnego.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4a4581cb2b5e..051caecf7d67 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -86,6 +86,9 @@ struct key_type cifs_spnego_key_type = {
 /* strlen of ";user=" */
 #define USER_KEY_LEN		6
 
+/* strlen of ";pid=0x" */
+#define PID_KEY_LEN		7
+
 /* get a key struct with a SPNEGO security blob, suitable for session setup */
 struct key *
 cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
@@ -103,7 +106,8 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 		   IP_KEY_LEN + INET6_ADDRSTRLEN +
 		   MAX_MECH_STR_LEN +
 		   UID_KEY_LEN + (sizeof(uid_t) * 2) +
-		   USER_KEY_LEN + strlen(sesInfo->userName) + 1;
+		   USER_KEY_LEN + strlen(sesInfo->userName) +
+		   PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
 
 	spnego_key = ERR_PTR(-ENOMEM);
 	description = kzalloc(desc_len, GFP_KERNEL);
@@ -141,6 +145,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 	dp = description + strlen(description);
 	sprintf(dp, ";user=%s", sesInfo->userName);
 
+	dp = description + strlen(description);
+	sprintf(dp, ";pid=0x%x", current->pid);
+
 	cFYI(1, ("key description = %s", description));
 	spnego_key = request_key(&cifs_spnego_key_type, description, "");
 
-- 
cgit v1.2.3


From 01ea95e3b6b16573a491ef98ad63f7a1bdcb504f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 9 Jul 2009 20:02:49 -0400
Subject: cifs: rename CIFSSMBUnixSetInfo to CIFSSMBUnixSetPathInfo

cifs: rename CIFSSMBUnixSetInfo to CIFSSMBUnixSetPathInfo

...in preparation of adding a SET_FILE_INFO variant.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |  2 +-
 fs/cifs/cifssmb.c   |  6 +++---
 fs/cifs/dir.c       | 15 ++++++++-------
 fs/cifs/file.c      |  6 +++---
 fs/cifs/inode.c     | 16 ++++++++--------
 5 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b2bd83fd2aa4..d95fd427de57 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -220,7 +220,7 @@ struct cifs_unix_set_info_args {
 	dev_t	device;
 };
 
-extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
+extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon,
 			char *fileName,
 			const struct cifs_unix_set_info_args *args,
 			const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 61007c627497..1cd01ba03656 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5075,9 +5075,9 @@ SetAttrLgcyRetry:
 #endif /* temporarily unneeded SetAttr legacy function */
 
 int
-CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
-		   const struct cifs_unix_set_info_args *args,
-		   const struct nls_table *nls_codepage, int remap)
+CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
+		       const struct cifs_unix_set_info_args *args,
+		       const struct nls_table *nls_codepage, int remap)
 {
 	TRANSACTION2_SPI_REQ *pSMB = NULL;
 	TRANSACTION2_SPI_RSP *pSMBr = NULL;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ff55fc6932cb..4326ffd90fa9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -425,9 +425,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 			args.uid = NO_CHANGE_64;
 			args.gid = NO_CHANGE_64;
 		}
-		CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
-			cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+		CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
 	} else {
 		/* BB implement mode setting via Windows security
 		   descriptors e.g. */
@@ -515,10 +516,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 			args.uid = NO_CHANGE_64;
 			args.gid = NO_CHANGE_64;
 		}
-		rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
-			&args, cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags &
-				CIFS_MOUNT_MAP_SPECIAL_CHR);
+		rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
+					    cifs_sb->local_nls,
+					    cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 		if (!rc) {
 			rc = cifs_get_inode_info_unix(&newinode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 97ce4bf89d15..c34b7f8a217b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -448,9 +448,9 @@ int cifs_open(struct inode *inode, struct file *file)
 				.mtime	= NO_CHANGE_64,
 				.device	= 0,
 			};
-			CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
-					    cifs_sb->local_nls,
-					    cifs_sb->mnt_cifs_flags &
+			CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
+					       cifs_sb->local_nls,
+					       cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 		}
 	}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b22379610d71..ad19007ea05f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1241,10 +1241,10 @@ mkdir_get_info:
 				args.uid = NO_CHANGE_64;
 				args.gid = NO_CHANGE_64;
 			}
-			CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
-					    cifs_sb->local_nls,
-					    cifs_sb->mnt_cifs_flags &
-					    CIFS_MOUNT_MAP_SPECIAL_CHR);
+			CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
+					       cifs_sb->local_nls,
+					       cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
 		} else {
 			if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
 			    (mode & S_IWUGO) == 0) {
@@ -1876,10 +1876,10 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 		args->ctime = NO_CHANGE_64;
 
 	args->device = 0;
-	rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
-				cifs_sb->local_nls,
-				cifs_sb->mnt_cifs_flags &
-				CIFS_MOUNT_MAP_SPECIAL_CHR);
+	rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
+				    cifs_sb->local_nls,
+				    cifs_sb->mnt_cifs_flags &
+					CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 	if (!rc)
 		rc = inode_setattr(inode, attrs);
-- 
cgit v1.2.3


From 654cf14ac0a71c56c1f0032140c3403382ca076b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 9 Jul 2009 20:02:49 -0400
Subject: cifs: make a separate function for filling out FILE_UNIX_BASIC_INFO

cifs: make a separate function for filling out FILE_UNIX_BASIC_INFO

The SET_FILE_INFO variant will need to do the same thing here. Break
this code out into a separate function that both variants can call.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 74 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1cd01ba03656..1f3c8a463fcd 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5074,6 +5074,47 @@ SetAttrLgcyRetry:
 }
 #endif /* temporarily unneeded SetAttr legacy function */
 
+static void
+cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
+			const struct cifs_unix_set_info_args *args)
+{
+	u64 mode = args->mode;
+
+	/*
+	 * Samba server ignores set of file size to zero due to bugs in some
+	 * older clients, but we should be precise - we use SetFileSize to
+	 * set file size and do not want to truncate file size to zero
+	 * accidently as happened on one Samba server beta by putting
+	 * zero instead of -1 here
+	 */
+	data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
+	data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
+	data_offset->LastStatusChange = cpu_to_le64(args->ctime);
+	data_offset->LastAccessTime = cpu_to_le64(args->atime);
+	data_offset->LastModificationTime = cpu_to_le64(args->mtime);
+	data_offset->Uid = cpu_to_le64(args->uid);
+	data_offset->Gid = cpu_to_le64(args->gid);
+	/* better to leave device as zero when it is  */
+	data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
+	data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
+	data_offset->Permissions = cpu_to_le64(mode);
+
+	if (S_ISREG(mode))
+		data_offset->Type = cpu_to_le32(UNIX_FILE);
+	else if (S_ISDIR(mode))
+		data_offset->Type = cpu_to_le32(UNIX_DIR);
+	else if (S_ISLNK(mode))
+		data_offset->Type = cpu_to_le32(UNIX_SYMLINK);
+	else if (S_ISCHR(mode))
+		data_offset->Type = cpu_to_le32(UNIX_CHARDEV);
+	else if (S_ISBLK(mode))
+		data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV);
+	else if (S_ISFIFO(mode))
+		data_offset->Type = cpu_to_le32(UNIX_FIFO);
+	else if (S_ISSOCK(mode))
+		data_offset->Type = cpu_to_le32(UNIX_SOCKET);
+}
+
 int
 CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
 		       const struct cifs_unix_set_info_args *args,
@@ -5086,7 +5127,6 @@ CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
 	int bytes_returned = 0;
 	FILE_UNIX_BASIC_INFO *data_offset;
 	__u16 params, param_offset, offset, count, byte_count;
-	__u64 mode = args->mode;
 
 	cFYI(1, ("In SetUID/GID/Mode"));
 setPermsRetry:
@@ -5137,38 +5177,8 @@ setPermsRetry:
 	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
 	pSMB->Reserved4 = 0;
 	pSMB->hdr.smb_buf_length += byte_count;
-	/* Samba server ignores set of file size to zero due to bugs in some
-	older clients, but we should be precise - we use SetFileSize to
-	set file size and do not want to truncate file size to zero
-	accidently as happened on one Samba server beta by putting
-	zero instead of -1 here */
-	data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
-	data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
-	data_offset->LastStatusChange = cpu_to_le64(args->ctime);
-	data_offset->LastAccessTime = cpu_to_le64(args->atime);
-	data_offset->LastModificationTime = cpu_to_le64(args->mtime);
-	data_offset->Uid = cpu_to_le64(args->uid);
-	data_offset->Gid = cpu_to_le64(args->gid);
-	/* better to leave device as zero when it is  */
-	data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
-	data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
-	data_offset->Permissions = cpu_to_le64(mode);
-
-	if (S_ISREG(mode))
-		data_offset->Type = cpu_to_le32(UNIX_FILE);
-	else if (S_ISDIR(mode))
-		data_offset->Type = cpu_to_le32(UNIX_DIR);
-	else if (S_ISLNK(mode))
-		data_offset->Type = cpu_to_le32(UNIX_SYMLINK);
-	else if (S_ISCHR(mode))
-		data_offset->Type = cpu_to_le32(UNIX_CHARDEV);
-	else if (S_ISBLK(mode))
-		data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV);
-	else if (S_ISFIFO(mode))
-		data_offset->Type = cpu_to_le32(UNIX_FIFO);
-	else if (S_ISSOCK(mode))
-		data_offset->Type = cpu_to_le32(UNIX_SOCKET);
 
+	cifs_fill_unix_set_info(data_offset, args);
 
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
-- 
cgit v1.2.3


From 3bbeeb3c93a961bd01b969dd4395ecac0c09db8d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 9 Jul 2009 20:02:50 -0400
Subject: cifs: add and use CIFSSMBUnixSetFileInfo for setattr calls

cifs: add and use CIFSSMBUnixSetFileInfo for setattr calls

When there's an open filehandle, SET_FILE_INFO is apparently preferred
over SET_PATH_INFO. Add a new variant that sets a FILE_UNIX_INFO_BASIC
infolevel via SET_FILE_INFO and switch cifs_setattr_unix to use the
new call when there's an open filehandle available.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |  4 ++++
 fs/cifs/cifssmb.c   | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/inode.c     | 11 +++++++++-
 3 files changed, 77 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index d95fd427de57..37c11c08c529 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -220,6 +220,10 @@ struct cifs_unix_set_info_args {
 	dev_t	device;
 };
 
+extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+				  const struct cifs_unix_set_info_args *args,
+				  u16 fid, u32 pid_of_opener);
+
 extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon,
 			char *fileName,
 			const struct cifs_unix_set_info_args *args,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1f3c8a463fcd..922f5fe2084c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5115,6 +5115,69 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
 		data_offset->Type = cpu_to_le32(UNIX_SOCKET);
 }
 
+int
+CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+		       const struct cifs_unix_set_info_args *args,
+		       u16 fid, u32 pid_of_opener)
+{
+	struct smb_com_transaction2_sfi_req *pSMB  = NULL;
+	FILE_UNIX_BASIC_INFO *data_offset;
+	int rc = 0;
+	u16 params, param_offset, offset, byte_count, count;
+
+	cFYI(1, ("Set Unix Info (via SetFileInfo)"));
+	rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
+
+	if (rc)
+		return rc;
+
+	pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
+	pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
+
+	params = 6;
+	pSMB->MaxSetupCount = 0;
+	pSMB->Reserved = 0;
+	pSMB->Flags = 0;
+	pSMB->Timeout = 0;
+	pSMB->Reserved2 = 0;
+	param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
+	offset = param_offset + params;
+
+	data_offset = (FILE_UNIX_BASIC_INFO *)
+				((char *)(&pSMB->hdr.Protocol) + offset);
+	count = sizeof(FILE_UNIX_BASIC_INFO);
+
+	pSMB->MaxParameterCount = cpu_to_le16(2);
+	/* BB find max SMB PDU from sess */
+	pSMB->MaxDataCount = cpu_to_le16(1000);
+	pSMB->SetupCount = 1;
+	pSMB->Reserved3 = 0;
+	pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION);
+	byte_count = 3 /* pad */  + params + count;
+	pSMB->DataCount = cpu_to_le16(count);
+	pSMB->ParameterCount = cpu_to_le16(params);
+	pSMB->TotalDataCount = pSMB->DataCount;
+	pSMB->TotalParameterCount = pSMB->ParameterCount;
+	pSMB->ParameterOffset = cpu_to_le16(param_offset);
+	pSMB->DataOffset = cpu_to_le16(offset);
+	pSMB->Fid = fid;
+	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
+	pSMB->Reserved4 = 0;
+	pSMB->hdr.smb_buf_length += byte_count;
+	pSMB->ByteCount = cpu_to_le16(byte_count);
+
+	cifs_fill_unix_set_info(data_offset, args);
+
+	rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
+	if (rc)
+		cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc));
+
+	/* Note: On -EAGAIN error only caller can retry on handle based calls
+		since file handle passed in no longer valid */
+
+	return rc;
+}
+
 int
 CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
 		       const struct cifs_unix_set_info_args *args,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ad19007ea05f..55b616bb381e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1790,6 +1790,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsTconInfo *pTcon = cifs_sb->tcon;
 	struct cifs_unix_set_info_args *args = NULL;
+	struct cifsFileInfo *open_file;
 
 	cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
 		 direntry->d_name.name, attrs->ia_valid));
@@ -1876,10 +1877,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 		args->ctime = NO_CHANGE_64;
 
 	args->device = 0;
-	rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
+	open_file = find_writable_file(cifsInode);
+	if (open_file) {
+		u16 nfid = open_file->netfid;
+		u32 npid = open_file->pid;
+		rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
+		atomic_dec(&open_file->wrtPending);
+	} else {
+		rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
 				    cifs_sb->local_nls,
 				    cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
+	}
 
 	if (!rc)
 		rc = inode_setattr(inode, attrs);
-- 
cgit v1.2.3


From b77863bfa153e886f9f8faf1a791ba57a36efed0 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 9 Jul 2009 22:51:38 +0000
Subject: [CIFS] update cifs version number

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES  | 6 +++++-
 fs/cifs/cifsfs.h | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 3a9b7a58a51d..92888aa90749 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,11 @@ client generated ones by default (mount option "serverino" turned
 on by default if server supports it).  Add forceuid and forcegid
 mount options (so that when negotiating unix extensions specifying
 which uid mounted does not immediately force the server's reported
-uids to be overridden).  Add support for scope moutn parm.
+uids to be overridden).  Add support for scope mount parm. Improve
+hard link detection to use same inode for both.  Do not set
+read-only dos attribute on directories (for chmod) since Windows
+explorer special cases this attribute bit for directories for
+a different purpose.
 
 Version 1.58
 ------------
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 586df24c9abb..6c170948300d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
 
-#define CIFS_VERSION   "1.59"
+#define CIFS_VERSION   "1.60"
 #endif				/* _CIFSFS_H */
-- 
cgit v1.2.3


From 0b8f18e358384a52c1ed7fa7129c08e7eaf86bb6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 9 Jul 2009 01:46:37 -0400
Subject: cifs: convert cifs_get_inode_info and non-posix readdir to use
 cifs_iget

cifs: convert cifs_get_inode_info and non-posix readdir to use cifs_iget

Rather than allocating an inode and filling it out, have
cifs_get_inode_info fill out a cifs_fattr and call cifs_iget. This means
a pretty hefty reorganization of cifs_get_inode_info.

For the readdir codepath, add a couple of new functions for filling out
cifs_fattr's from different FindFile response infolevels.

Finally, remove cifs_new_inode since there are no more callers.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsacl.c   |  26 ++--
 fs/cifs/cifsglob.h  |   2 +
 fs/cifs/cifsproto.h |   6 +-
 fs/cifs/inode.c     | 397 ++++++++++++++++++++--------------------------------
 fs/cifs/readdir.c   | 350 +++++++++++----------------------------------
 5 files changed, 252 insertions(+), 529 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1403b5d86a73..6941c22398a6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -327,7 +327,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl)
 
 static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 		       struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
-		       struct inode *inode)
+		       struct cifs_fattr *fattr)
 {
 	int i;
 	int num_aces = 0;
@@ -340,7 +340,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 	if (!pdacl) {
 		/* no DACL in the security descriptor, set
 		   all the permissions for user/group/other */
-		inode->i_mode |= S_IRWXUGO;
+		fattr->cf_mode |= S_IRWXUGO;
 		return;
 	}
 
@@ -357,7 +357,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 	/* reset rwx permissions for user/group/other.
 	   Also, if num_aces is 0 i.e. DACL has no ACEs,
 	   user/group/other have no permissions */
-	inode->i_mode &= ~(S_IRWXUGO);
+	fattr->cf_mode &= ~(S_IRWXUGO);
 
 	acl_base = (char *)pdacl;
 	acl_size = sizeof(struct cifs_acl);
@@ -379,17 +379,17 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 			if (compare_sids(&(ppace[i]->sid), pownersid))
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
-						     &(inode->i_mode),
+						     &fattr->cf_mode,
 						     &user_mask);
 			if (compare_sids(&(ppace[i]->sid), pgrpsid))
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
-						     &(inode->i_mode),
+						     &fattr->cf_mode,
 						     &group_mask);
 			if (compare_sids(&(ppace[i]->sid), &sid_everyone))
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
-						     &(inode->i_mode),
+						     &fattr->cf_mode,
 						     &other_mask);
 
 /*			memcpy((void *)(&(cifscred->aces[i])),
@@ -464,7 +464,7 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
 
 /* Convert CIFS ACL to POSIX form */
 static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
-			  struct inode *inode)
+			  struct cifs_fattr *fattr)
 {
 	int rc;
 	struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
@@ -472,7 +472,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 	char *end_of_acl = ((char *)pntsd) + acl_len;
 	__u32 dacloffset;
 
-	if ((inode == NULL) || (pntsd == NULL))
+	if (pntsd == NULL)
 		return -EIO;
 
 	owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
@@ -497,7 +497,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 
 	if (dacloffset)
 		parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
-			   group_sid_ptr, inode);
+			   group_sid_ptr, fattr);
 	else
 		cFYI(1, ("no ACL")); /* BB grant all or default perms? */
 
@@ -508,7 +508,6 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 	memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
 			sizeof(struct cifs_sid)); */
 
-
 	return 0;
 }
 
@@ -671,8 +670,9 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 }
 
 /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
-void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
-		     const char *path, const __u16 *pfid)
+void
+cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
+		  struct inode *inode, const char *path, const __u16 *pfid)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	u32 acllen = 0;
@@ -687,7 +687,7 @@ void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
 
 	/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
 	if (pntsd)
-		rc = parse_sec_desc(pntsd, acllen, inode);
+		rc = parse_sec_desc(pntsd, acllen, fattr);
 	if (rc)
 		cFYI(1, ("parse sec desc failed rc = %d", rc));
 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e6435cba8113..8bcf5a4bcded 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -479,6 +479,8 @@ struct dfs_info3_param {
  */
 
 #define CIFS_FATTR_DFS_REFERRAL		0x1
+#define CIFS_FATTR_DELETE_PENDING	0x2
+#define CIFS_FATTR_NEED_REVAL		0x4
 
 struct cifs_fattr {
 	u32		cf_flags;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 37c11c08c529..da8fbf565991 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -102,7 +102,6 @@ extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
 				     FILE_UNIX_BASIC_INFO *info,
 				     struct cifs_sb_info *cifs_sb);
 extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
-extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum);
 extern struct inode *cifs_iget(struct super_block *sb,
 			       struct cifs_fattr *fattr);
 
@@ -113,8 +112,9 @@ extern int cifs_get_inode_info(struct inode **pinode,
 extern int cifs_get_inode_info_unix(struct inode **pinode,
 			const unsigned char *search_path,
 			struct super_block *sb, int xid);
-extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
-			    const char *path, const __u16 *pfid);
+extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
+			      struct cifs_fattr *fattr, struct inode *inode,
+			      const char *path, const __u16 *pfid);
 extern int mode_to_acl(struct inode *inode, const char *path, __u64);
 
 extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 55b616bb381e..a807397f444e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -82,23 +82,34 @@ void
 cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
 {
 	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
-	unsigned long now = jiffies;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	unsigned long oldtime = cifs_i->time;
 
 	inode->i_atime = fattr->cf_atime;
 	inode->i_mtime = fattr->cf_mtime;
 	inode->i_ctime = fattr->cf_ctime;
-	inode->i_mode = fattr->cf_mode;
 	inode->i_rdev = fattr->cf_rdev;
 	inode->i_nlink = fattr->cf_nlink;
 	inode->i_uid = fattr->cf_uid;
 	inode->i_gid = fattr->cf_gid;
 
+	/* if dynperm is set, don't clobber existing mode */
+	if (inode->i_state & I_NEW ||
+	    !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM))
+		inode->i_mode = fattr->cf_mode;
+
 	cifs_i->cifsAttrs = fattr->cf_cifsattrs;
 	cifs_i->uniqueid = fattr->cf_uniqueid;
 
+	if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
+		cifs_i->time = 0;
+	else
+		cifs_i->time = jiffies;
+
 	cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode,
-		 cifs_i->time, now));
-	cifs_i->time = now;
+		 oldtime, cifs_i->time));
+
+	cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
 
 	/*
 	 * Can't safely change the file size here if the client is writing to
@@ -219,49 +230,6 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
 	fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
 }
 
-/**
- * cifs_new inode - create new inode, initialize, and hash it
- * @sb - pointer to superblock
- * @inum - if valid pointer and serverino is enabled, replace i_ino with val
- *
- * Create a new inode, initialize it for CIFS and hash it. Returns the new
- * inode or NULL if one couldn't be allocated.
- *
- * If the share isn't mounted with "serverino" or inum is a NULL pointer then
- * we'll just use the inode number assigned by new_inode(). Note that this can
- * mean i_ino collisions since the i_ino assigned by new_inode is not
- * guaranteed to be unique.
- */
-struct inode *
-cifs_new_inode(struct super_block *sb, __u64 *inum)
-{
-	struct inode *inode;
-
-	inode = new_inode(sb);
-	if (inode == NULL)
-		return NULL;
-
-	/*
-	 * BB: Is i_ino == 0 legal? Here, we assume that it is. If it isn't we
-	 *     stop passing inum as ptr. Are there sanity checks we can use to
-	 *     ensure that the server is really filling in that field? Also,
-	 *     if serverino is disabled, perhaps we should be using iunique()?
-	 */
-	if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM))
-		inode->i_ino = (unsigned long) *inum;
-
-	/*
-	 * must set this here instead of cifs_alloc_inode since VFS will
-	 * clobber i_flags
-	 */
-	if (sb->s_flags & MS_NOATIME)
-		inode->i_flags |= S_NOATIME | S_NOCMTIME;
-
-	insert_inode_hash(inode);
-
-	return inode;
-}
-
 int cifs_get_inode_info_unix(struct inode **pinode,
 			     const unsigned char *full_path,
 			     struct super_block *sb, int xid)
@@ -302,9 +270,9 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 	return rc;
 }
 
-static int decode_sfu_inode(struct inode *inode, __u64 size,
-			    const unsigned char *path,
-			    struct cifs_sb_info *cifs_sb, int xid)
+static int
+cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
+	      struct cifs_sb_info *cifs_sb, int xid)
 {
 	int rc;
 	int oplock = 0;
@@ -316,10 +284,15 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
 
 	pbuf = buf;
 
-	if (size == 0) {
-		inode->i_mode |= S_IFIFO;
+	fattr->cf_mode &= ~S_IFMT;
+
+	if (fattr->cf_eof == 0) {
+		fattr->cf_mode |= S_IFIFO;
+		fattr->cf_dtype = DT_FIFO;
 		return 0;
-	} else if (size < 8) {
+	} else if (fattr->cf_eof < 8) {
+		fattr->cf_mode |= S_IFREG;
+		fattr->cf_dtype = DT_REG;
 		return -EINVAL;	 /* EOPNOTSUPP? */
 	}
 
@@ -331,42 +304,46 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
 	if (rc == 0) {
 		int buf_type = CIFS_NO_BUFFER;
 			/* Read header */
-		rc = CIFSSMBRead(xid, pTcon,
-				 netfid,
+		rc = CIFSSMBRead(xid, pTcon, netfid,
 				 24 /* length */, 0 /* offset */,
 				 &bytes_read, &pbuf, &buf_type);
 		if ((rc == 0) && (bytes_read >= 8)) {
 			if (memcmp("IntxBLK", pbuf, 8) == 0) {
 				cFYI(1, ("Block device"));
-				inode->i_mode |= S_IFBLK;
+				fattr->cf_mode |= S_IFBLK;
+				fattr->cf_dtype = DT_BLK;
 				if (bytes_read == 24) {
 					/* we have enough to decode dev num */
 					__u64 mjr; /* major */
 					__u64 mnr; /* minor */
 					mjr = le64_to_cpu(*(__le64 *)(pbuf+8));
 					mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
-					inode->i_rdev = MKDEV(mjr, mnr);
+					fattr->cf_rdev = MKDEV(mjr, mnr);
 				}
 			} else if (memcmp("IntxCHR", pbuf, 8) == 0) {
 				cFYI(1, ("Char device"));
-				inode->i_mode |= S_IFCHR;
+				fattr->cf_mode |= S_IFCHR;
+				fattr->cf_dtype = DT_CHR;
 				if (bytes_read == 24) {
 					/* we have enough to decode dev num */
 					__u64 mjr; /* major */
 					__u64 mnr; /* minor */
 					mjr = le64_to_cpu(*(__le64 *)(pbuf+8));
 					mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
-					inode->i_rdev = MKDEV(mjr, mnr);
+					fattr->cf_rdev = MKDEV(mjr, mnr);
 				}
 			} else if (memcmp("IntxLNK", pbuf, 7) == 0) {
 				cFYI(1, ("Symlink"));
-				inode->i_mode |= S_IFLNK;
+				fattr->cf_mode |= S_IFLNK;
+				fattr->cf_dtype = DT_LNK;
 			} else {
-				inode->i_mode |= S_IFREG; /* file? */
+				fattr->cf_mode |= S_IFREG; /* file? */
+				fattr->cf_dtype = DT_REG;
 				rc = -EOPNOTSUPP;
 			}
 		} else {
-			inode->i_mode |= S_IFREG; /* then it is a file */
+			fattr->cf_mode |= S_IFREG; /* then it is a file */
+			fattr->cf_dtype = DT_REG;
 			rc = -EOPNOTSUPP; /* or some unknown SFU type */
 		}
 		CIFSSMBClose(xid, pTcon, netfid);
@@ -376,9 +353,13 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
 
 #define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID)  /* SETFILEBITS valid bits */
 
-static int get_sfu_mode(struct inode *inode,
-			const unsigned char *path,
-			struct cifs_sb_info *cifs_sb, int xid)
+/*
+ * Fetch mode bits as provided by SFU.
+ *
+ * FIXME: Doesn't this clobber the type bit we got from cifs_sfu_type ?
+ */
+static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
+			 struct cifs_sb_info *cifs_sb, int xid)
 {
 #ifdef CONFIG_CIFS_XATTR
 	ssize_t rc;
@@ -386,68 +367,80 @@ static int get_sfu_mode(struct inode *inode,
 	__u32 mode;
 
 	rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS",
-			ea_value, 4 /* size of buf */, cifs_sb->local_nls,
-		cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+			    ea_value, 4 /* size of buf */, cifs_sb->local_nls,
+			    cifs_sb->mnt_cifs_flags &
+				CIFS_MOUNT_MAP_SPECIAL_CHR);
 	if (rc < 0)
 		return (int)rc;
 	else if (rc > 3) {
 		mode = le32_to_cpu(*((__le32 *)ea_value));
-		inode->i_mode &= ~SFBITS_MASK;
-		cFYI(1, ("special bits 0%o org mode 0%o", mode, inode->i_mode));
-		inode->i_mode = (mode &  SFBITS_MASK) | inode->i_mode;
+		fattr->cf_mode &= ~SFBITS_MASK;
+		cFYI(1, ("special bits 0%o org mode 0%o", mode,
+			 fattr->cf_mode));
+		fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode;
 		cFYI(1, ("special mode bits 0%o", mode));
-		return 0;
-	} else {
-		return 0;
 	}
+
+	return 0;
 #else
 	return -EOPNOTSUPP;
 #endif
 }
 
-/*
- *	Needed to setup inode data for the directory which is the
- *	junction to the new submount (ie to setup the fake directory
- *      which represents a DFS referral)
- */
-static void fill_fake_finddata(FILE_ALL_INFO *pfnd_dat,
-			       struct super_block *sb)
+/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */
+void
+cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
+		       struct cifs_sb_info *cifs_sb, bool adjust_tz)
 {
-	memset(pfnd_dat, 0, sizeof(FILE_ALL_INFO));
-
-/*	__le64 pfnd_dat->AllocationSize = cpu_to_le64(0);
-	__le64 pfnd_dat->EndOfFile = cpu_to_le64(0);
-	__u8 pfnd_dat->DeletePending = 0;
-	__u8 pfnd_data->Directory = 0;
-	__le32 pfnd_dat->EASize = 0;
-	__u64 pfnd_dat->IndexNumber = 0;
-	__u64 pfnd_dat->IndexNumber1 = 0;  */
-	pfnd_dat->CreationTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->LastAccessTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->LastWriteTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->ChangeTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->Attributes = cpu_to_le32(ATTR_DIRECTORY);
-	pfnd_dat->NumberOfLinks = cpu_to_le32(2);
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_cifsattrs = le32_to_cpu(info->Attributes);
+	if (info->DeletePending)
+		fattr->cf_flags |= CIFS_FATTR_DELETE_PENDING;
+
+	if (info->LastAccessTime)
+		fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
+	else
+		fattr->cf_atime = CURRENT_TIME;
+
+	fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
+	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
+
+	if (adjust_tz) {
+		fattr->cf_ctime.tv_sec += cifs_sb->tcon->ses->server->timeAdj;
+		fattr->cf_mtime.tv_sec += cifs_sb->tcon->ses->server->timeAdj;
+	}
+
+	fattr->cf_eof = le64_to_cpu(info->EndOfFile);
+	fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
+
+	if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
+		fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
+		fattr->cf_dtype = DT_DIR;
+	} else {
+		fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
+		fattr->cf_dtype = DT_REG;
+	}
+
+	/* clear write bits if ATTR_READONLY is set */
+	if (fattr->cf_cifsattrs & ATTR_READONLY)
+		fattr->cf_mode &= ~(S_IWUGO);
+
+	fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+
+	fattr->cf_uid = cifs_sb->mnt_uid;
+	fattr->cf_gid = cifs_sb->mnt_gid;
 }
 
 int cifs_get_inode_info(struct inode **pinode,
 	const unsigned char *full_path, FILE_ALL_INFO *pfindData,
 	struct super_block *sb, int xid, const __u16 *pfid)
 {
-	int rc = 0;
-	__u32 attr;
-	struct cifsInodeInfo *cifsInfo;
+	int rc = 0, tmprc;
 	struct cifsTconInfo *pTcon;
-	struct inode *inode;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	char *buf = NULL;
 	bool adjustTZ = false;
-	bool is_dfs_referral = false;
-	umode_t default_mode;
+	struct cifs_fattr fattr;
 
 	pTcon = cifs_sb->tcon;
 	cFYI(1, ("Getting info on %s", full_path));
@@ -482,163 +475,82 @@ int cifs_get_inode_info(struct inode **pinode,
 			adjustTZ = true;
 		}
 	}
-	/* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */
-	if (rc == -EREMOTE) {
-		is_dfs_referral = true;
-		fill_fake_finddata(pfindData, sb);
+
+	if (!rc) {
+		cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *) pfindData,
+				       cifs_sb, adjustTZ);
+	} else if (rc == -EREMOTE) {
+		cifs_create_dfs_fattr(&fattr, sb);
 		rc = 0;
-	} else if (rc)
+	} else {
 		goto cgii_exit;
+	}
 
-	attr = le32_to_cpu(pfindData->Attributes);
-
-	/* get new inode */
+	/*
+	 * If an inode wasn't passed in, then get the inode number
+	 *
+	 * Is an i_ino of zero legal? Can we use that to check if the server
+	 * supports returning inode numbers?  Are there other sanity checks we
+	 * can use to ensure that the server is really filling in that field?
+	 *
+	 * We can not use the IndexNumber field by default from Windows or
+	 * Samba (in ALL_INFO buf) but we can request it explicitly. The SNIA
+	 * CIFS spec claims that this value is unique within the scope of a
+	 * share, and the windows docs hint that it's actually unique
+	 * per-machine.
+	 *
+	 * There may be higher info levels that work but are there Windows
+	 * server or network appliances for which IndexNumber field is not
+	 * guaranteed unique?
+	 */
 	if (*pinode == NULL) {
-		__u64 inode_num;
-		__u64 *pinum = &inode_num;
-
-		/* Is an i_ino of zero legal? Can we use that to check
-		   if the server supports returning inode numbers?  Are
-		   there other sanity checks we can use to ensure that
-		   the server is really filling in that field? */
-
-		/* We can not use the IndexNumber field by default from
-		   Windows or Samba (in ALL_INFO buf) but we can request
-		   it explicitly.  It may not be unique presumably if
-		   the server has multiple devices mounted under one share */
-
-		/* There may be higher info levels that work but are
-		   there Windows server or network appliances for which
-		   IndexNumber field is not guaranteed unique? */
-
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
 			int rc1 = 0;
 
 			rc1 = CIFSGetSrvInodeNumber(xid, pTcon,
-					full_path, pinum,
+					full_path, &fattr.cf_uniqueid,
 					cifs_sb->local_nls,
 					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 			if (rc1) {
-				cFYI(1, ("GetSrvInodeNum rc %d", rc1));
-				pinum = NULL;
 				/* BB EOPNOSUPP disable SERVER_INUM? */
+				cFYI(1, ("GetSrvInodeNum rc %d", rc1));
+				fattr.cf_uniqueid = iunique(sb, ROOT_I);
 			}
 		} else {
-			pinum = NULL;
+			fattr.cf_uniqueid = iunique(sb, ROOT_I);
 		}
-
-		*pinode = cifs_new_inode(sb, pinum);
-		if (*pinode == NULL) {
-			rc = -ENOMEM;
-			goto cgii_exit;
-		}
-	}
-	inode = *pinode;
-	cifsInfo = CIFS_I(inode);
-	cifsInfo->cifsAttrs = attr;
-	cifsInfo->delete_pending = pfindData->DeletePending ? true : false;
-	cFYI(1, ("Old time %ld", cifsInfo->time));
-	cifsInfo->time = jiffies;
-	cFYI(1, ("New time %ld", cifsInfo->time));
-
-	/* blksize needs to be multiple of two. So safer to default to
-	blksize and blkbits set in superblock so 2**blkbits and blksize
-	will match rather than setting to:
-	(pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
-
-	/* Linux can not store file creation time so ignore it */
-	if (pfindData->LastAccessTime)
-		inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime);
-	else /* do not need to use current_fs_time - time not stored */
-		inode->i_atime = CURRENT_TIME;
-	inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime);
-	inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime);
-	cFYI(DBG2, ("Attributes came in as 0x%x", attr));
-	if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
-		inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
-		inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj;
-	}
-
-	/* get default inode mode */
-	if (attr & ATTR_DIRECTORY)
-		default_mode = cifs_sb->mnt_dir_mode;
-	else
-		default_mode = cifs_sb->mnt_file_mode;
-
-	/* set permission bits */
-	if (atomic_read(&cifsInfo->inUse) == 0 ||
-	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
-		inode->i_mode = default_mode;
-	else {
-		/* just reenable write bits if !ATTR_READONLY */
-		if ((inode->i_mode & S_IWUGO) == 0 &&
-		    (attr & ATTR_READONLY) == 0)
-			inode->i_mode |= (S_IWUGO & default_mode);
-
-		inode->i_mode &= ~S_IFMT;
-	}
-	/* clear write bits if ATTR_READONLY is set */
-	if (attr & ATTR_READONLY)
-		inode->i_mode &= ~S_IWUGO;
-
-	/* set inode type */
-	if ((attr & ATTR_SYSTEM) &&
-	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
-		/* no need to fix endianness on 0 */
-		if (pfindData->EndOfFile == 0)
-			inode->i_mode |= S_IFIFO;
-		else if (decode_sfu_inode(inode,
-				le64_to_cpu(pfindData->EndOfFile),
-				full_path, cifs_sb, xid))
-			cFYI(1, ("unknown SFU file type\n"));
 	} else {
-		if (attr & ATTR_DIRECTORY)
-			inode->i_mode |= S_IFDIR;
-		else
-			inode->i_mode |= S_IFREG;
+		fattr.cf_uniqueid = CIFS_I(*pinode)->uniqueid;
 	}
 
-	cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile);
-	spin_lock(&inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) {
-		/* can not safely shrink the file size here if the
-		   client is writing to it due to potential races */
-		i_size_write(inode, cifsInfo->server_eof);
-
-		/* 512 bytes (2**9) is the fake blocksize that must be
-		   used for this calculation */
-		inode->i_blocks = (512 - 1 + le64_to_cpu(
-				   pfindData->AllocationSize)) >> 9;
+	/* query for SFU type info if supported and needed */
+	if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
+	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+		tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid);
+		if (tmprc)
+			cFYI(1, ("cifs_sfu_type failed: %d", tmprc));
 	}
-	spin_unlock(&inode->i_lock);
 
-	inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks);
-
-	/* BB fill in uid and gid here? with help from winbind?
-	   or retrieve from NTFS stream extended attribute */
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 	/* fill in 0777 bits from ACL */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 		cFYI(1, ("Getting mode bits from ACL"));
-		acl_to_uid_mode(cifs_sb, inode, full_path, pfid);
+		cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
 	}
 #endif
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
-		/* fill in remaining high mode bits e.g. SUID, VTX */
-		get_sfu_mode(inode, full_path, cifs_sb, xid);
-	} else if (atomic_read(&cifsInfo->inUse) == 0) {
-		inode->i_uid = cifs_sb->mnt_uid;
-		inode->i_gid = cifs_sb->mnt_gid;
-		/* set so we do not keep refreshing these fields with
-		   bad data after user has changed them in memory */
-		atomic_set(&cifsInfo->inUse, 1);
-	}
-
-	cifs_set_ops(inode, is_dfs_referral);
-
 
+	/* fill in remaining high mode bits e.g. SUID, VTX */
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
+		cifs_sfu_mode(&fattr, full_path, cifs_sb, xid);
 
+	if (!*pinode) {
+		*pinode = cifs_iget(sb, &fattr);
+		if (!*pinode)
+			rc = -ENOMEM;
+	} else {
+		cifs_fattr_to_inode(*pinode, &fattr);
+	}
 
 cgii_exit:
 	kfree(buf);
@@ -753,21 +665,14 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 		return ERR_PTR(-ENOMEM);
 
 	xid = GetXid();
-	if (cifs_sb->tcon->unix_ext) {
+	if (cifs_sb->tcon->unix_ext)
 		rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
-		if (!inode)
-			return ERR_PTR(-ENOMEM);
-	} else {
-		inode = iget_locked(sb, ino);
-		if (!inode)
-			return ERR_PTR(-ENOMEM);
-		if (!(inode->i_state & I_NEW))
-			return inode;
-
-		rc = cifs_get_inode_info(&inode, full_path, NULL, inode->i_sb,
+	else
+		rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
 						xid, NULL);
-		unlock_new_inode(inode);
-	}
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
 
 	if (rc && cifs_sb->tcon->ipc) {
 		cFYI(1, ("ipc connection - fake read inode"));
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 231aa6953f83..f823a4a208a7 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -112,239 +112,74 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
 	return dentry;
 }
 
-/* Returns 1 if new inode created, 2 if both dentry and inode were */
-/* Might check in the future if inode number changed so we can rehash inode */
-static int
-construct_dentry(struct qstr *qstring, struct file *file,
-		 struct inode **ptmp_inode, struct dentry **pnew_dentry,
-		 __u64 *inum)
+static void
+cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 {
-	struct dentry *tmp_dentry = NULL;
-	struct super_block *sb = file->f_path.dentry->d_sb;
-	int rc = 0;
+	fattr->cf_uid = cifs_sb->mnt_uid;
+	fattr->cf_gid = cifs_sb->mnt_gid;
 
-	cFYI(1, ("For %s", qstring->name));
-
-	tmp_dentry = d_lookup(file->f_path.dentry, qstring);
-	if (tmp_dentry) {
-		/* BB: overwrite old name? i.e. tmp_dentry->d_name and
-		 * tmp_dentry->d_name.len??
-		 */
-		cFYI(0, ("existing dentry with inode 0x%p",
-			 tmp_dentry->d_inode));
-		*ptmp_inode = tmp_dentry->d_inode;
-		if (*ptmp_inode == NULL) {
-			*ptmp_inode = cifs_new_inode(sb, inum);
-			if (*ptmp_inode == NULL)
-				return rc;
-			rc = 1;
-		}
+	if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
+		fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
+		fattr->cf_dtype = DT_DIR;
 	} else {
-		tmp_dentry = d_alloc(file->f_path.dentry, qstring);
-		if (tmp_dentry == NULL) {
-			cERROR(1, ("Failed allocating dentry"));
-			*ptmp_inode = NULL;
-			return rc;
-		}
-
-		if (CIFS_SB(sb)->tcon->nocase)
-			tmp_dentry->d_op = &cifs_ci_dentry_ops;
-		else
-			tmp_dentry->d_op = &cifs_dentry_ops;
-
-		*ptmp_inode = cifs_new_inode(sb, inum);
-		if (*ptmp_inode == NULL)
-			return rc;
-		rc = 2;
-	}
-
-	tmp_dentry->d_time = jiffies;
-	*pnew_dentry = tmp_dentry;
-	return rc;
-}
-
-static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
-			  char *buf, unsigned int *pobject_type, int isNewInode)
-{
-	loff_t local_size;
-	struct timespec local_mtime;
-
-	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
-	struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
-	__u32 attr;
-	__u64 allocation_size;
-	__u64 end_of_file;
-	umode_t default_mode;
-
-	/* save mtime and size */
-	local_mtime = tmp_inode->i_mtime;
-	local_size  = tmp_inode->i_size;
-
-	if (new_buf_type) {
-		FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf;
-
-		attr = le32_to_cpu(pfindData->ExtFileAttributes);
-		allocation_size = le64_to_cpu(pfindData->AllocationSize);
-		end_of_file = le64_to_cpu(pfindData->EndOfFile);
-		tmp_inode->i_atime =
-			cifs_NTtimeToUnix(pfindData->LastAccessTime);
-		tmp_inode->i_mtime =
-			cifs_NTtimeToUnix(pfindData->LastWriteTime);
-		tmp_inode->i_ctime =
-			cifs_NTtimeToUnix(pfindData->ChangeTime);
-	} else { /* legacy, OS2 and DOS style */
-		int offset = cifs_sb->tcon->ses->server->timeAdj;
-		FIND_FILE_STANDARD_INFO *pfindData =
-			(FIND_FILE_STANDARD_INFO *)buf;
-
-		tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate,
-						    pfindData->LastWriteTime,
-						    offset);
-		tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate,
-						    pfindData->LastAccessTime,
-						    offset);
-		tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate,
-						    pfindData->LastWriteTime,
-						    offset);
-		attr = le16_to_cpu(pfindData->Attributes);
-		allocation_size = le32_to_cpu(pfindData->AllocationSize);
-		end_of_file = le32_to_cpu(pfindData->DataSize);
-	}
-
-	/* Linux can not store file creation time unfortunately so ignore it */
-
-	cifsInfo->cifsAttrs = attr;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
-		/* get more accurate mode via ACL - so force inode refresh */
-		cifsInfo->time = 0;
-	} else
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
-		cifsInfo->time = jiffies;
-
-	/* treat dos attribute of read-only as read-only mode bit e.g. 555? */
-	/* 2767 perms - indicate mandatory locking */
-		/* BB fill in uid and gid here? with help from winbind?
-		   or retrieve from NTFS stream extended attribute */
-	if (atomic_read(&cifsInfo->inUse) == 0) {
-		tmp_inode->i_uid = cifs_sb->mnt_uid;
-		tmp_inode->i_gid = cifs_sb->mnt_gid;
+		fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
+		fattr->cf_dtype = DT_REG;
 	}
 
-	if (attr & ATTR_DIRECTORY)
-		default_mode = cifs_sb->mnt_dir_mode;
-	else
-		default_mode = cifs_sb->mnt_file_mode;
-
-	/* set initial permissions */
-	if ((atomic_read(&cifsInfo->inUse) == 0) ||
-	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
-		tmp_inode->i_mode = default_mode;
-	else {
-		/* just reenable write bits if !ATTR_READONLY */
-		if ((tmp_inode->i_mode & S_IWUGO) == 0 &&
-		    (attr & ATTR_READONLY) == 0)
-			tmp_inode->i_mode |= (S_IWUGO & default_mode);
-
-		tmp_inode->i_mode &= ~S_IFMT;
-	}
-
-	/* clear write bits if ATTR_READONLY is set */
-	if (attr & ATTR_READONLY)
-		tmp_inode->i_mode &= ~S_IWUGO;
+	if (fattr->cf_cifsattrs & ATTR_READONLY)
+		fattr->cf_mode &= ~S_IWUGO;
 
-	/* set inode type */
-	if ((attr & ATTR_SYSTEM) &&
-	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
-		if (end_of_file == 0)  {
-			tmp_inode->i_mode |= S_IFIFO;
-			*pobject_type = DT_FIFO;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL &&
+	    fattr->cf_cifsattrs & ATTR_SYSTEM) {
+		if (fattr->cf_eof == 0)  {
+			fattr->cf_mode &= ~S_IFMT;
+			fattr->cf_mode |= S_IFIFO;
+			fattr->cf_dtype = DT_FIFO;
 		} else {
 			/*
-			 * trying to get the type can be slow, so just call
-			 * this a regular file for now, and mark for reval
+			 * trying to get the type and mode via SFU can be slow,
+			 * so just call those regular files for now, and mark
+			 * for reval
 			 */
-			tmp_inode->i_mode |= S_IFREG;
-			*pobject_type = DT_REG;
-			cifsInfo->time = 0;
-		}
-	} else {
-		if (attr & ATTR_DIRECTORY) {
-			tmp_inode->i_mode |= S_IFDIR;
-			*pobject_type = DT_DIR;
-		} else {
-			tmp_inode->i_mode |= S_IFREG;
-			*pobject_type = DT_REG;
+			fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
 		}
 	}
+}
 
-	/* can not fill in nlink here as in qpathinfo version and Unx search */
-	if (atomic_read(&cifsInfo->inUse) == 0)
-		atomic_set(&cifsInfo->inUse, 1);
+void
+cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
+		       struct cifs_sb_info *cifs_sb)
+{
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
+	fattr->cf_eof = le64_to_cpu(info->EndOfFile);
+	fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
+	fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
+	fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
+	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
+
+	cifs_fill_common_info(fattr, cifs_sb);
+}
 
-	cifsInfo->server_eof = end_of_file;
-	spin_lock(&tmp_inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
-		/* can not safely change the file size here if the
-		client is writing to it due to potential races */
-		i_size_write(tmp_inode, end_of_file);
+void
+cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info,
+		       struct cifs_sb_info *cifs_sb)
+{
+	int offset = cifs_sb->tcon->ses->server->timeAdj;
 
-	/* 512 bytes (2**9) is the fake blocksize that must be used */
-	/* for this calculation, even though the reported blocksize is larger */
-		tmp_inode->i_blocks = (512 - 1 + allocation_size) >> 9;
-	}
-	spin_unlock(&tmp_inode->i_lock);
-
-	if (allocation_size < end_of_file)
-		cFYI(1, ("May be sparse file, allocation less than file size"));
-	cFYI(1, ("File Size %ld and blocks %llu",
-		(unsigned long)tmp_inode->i_size,
-		(unsigned long long)tmp_inode->i_blocks));
-	if (S_ISREG(tmp_inode->i_mode)) {
-		cFYI(1, ("File inode"));
-		tmp_inode->i_op = &cifs_file_inode_ops;
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-				tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
-			else
-				tmp_inode->i_fop = &cifs_file_direct_ops;
-		} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-			tmp_inode->i_fop = &cifs_file_nobrl_ops;
-		else
-			tmp_inode->i_fop = &cifs_file_ops;
-
-		if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
-		   (cifs_sb->tcon->ses->server->maxBuf <
-			PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
-			tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
-		else
-			tmp_inode->i_data.a_ops = &cifs_addr_ops;
-
-		if (isNewInode)
-			return; /* No sense invalidating pages for new inode
-				   since have not started caching readahead file
-				   data yet */
-
-		if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
-			(local_size == tmp_inode->i_size)) {
-			cFYI(1, ("inode exists but unchanged"));
-		} else {
-			/* file may have changed on server */
-			cFYI(1, ("invalidate inode, readdir detected change"));
-			invalidate_remote_inode(tmp_inode);
-		}
-	} else if (S_ISDIR(tmp_inode->i_mode)) {
-		cFYI(1, ("Directory inode"));
-		tmp_inode->i_op = &cifs_dir_inode_ops;
-		tmp_inode->i_fop = &cifs_dir_ops;
-	} else if (S_ISLNK(tmp_inode->i_mode)) {
-		cFYI(1, ("Symbolic Link inode"));
-		tmp_inode->i_op = &cifs_symlink_inode_ops;
-	} else {
-		cFYI(1, ("Init special inode"));
-		init_special_inode(tmp_inode, tmp_inode->i_mode,
-				   tmp_inode->i_rdev);
-	}
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_atime = cnvrtDosUnixTm(info->LastAccessDate,
+					    info->LastAccessTime, offset);
+	fattr->cf_ctime = cnvrtDosUnixTm(info->LastWriteDate,
+					    info->LastWriteTime, offset);
+	fattr->cf_mtime = cnvrtDosUnixTm(info->LastWriteDate,
+					    info->LastWriteTime, offset);
+
+	fattr->cf_cifsattrs = le16_to_cpu(info->Attributes);
+	fattr->cf_bytes = le32_to_cpu(info->AllocationSize);
+	fattr->cf_eof = le32_to_cpu(info->DataSize);
+
+	cifs_fill_common_info(fattr, cifs_sb);
 }
 
 /* BB eventually need to add the following helper function to
@@ -846,11 +681,10 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 	int rc = 0;
 	struct qstr qstring;
 	struct cifsFileInfo *pCifsF;
-	unsigned int obj_type;
-	__u64  inum;
+	u64    inum;
 	ino_t  ino;
+	struct super_block *sb;
 	struct cifs_sb_info *cifs_sb;
-	struct inode *tmp_inode;
 	struct dentry *tmp_dentry;
 	struct cifs_fattr fattr;
 
@@ -870,71 +704,53 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 	if (rc != 0)
 		return 0;
 
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	sb = file->f_path.dentry->d_sb;
+	cifs_sb = CIFS_SB(sb);
 
 	qstring.name = scratch_buf;
 	rc = cifs_get_name_from_search_buf(&qstring, pfindEntry,
 			pCifsF->srch_inf.info_level,
 			pCifsF->srch_inf.unicode, cifs_sb,
-			max_len,
-			&inum /* returned */);
+			max_len, &inum /* returned */);
 
 	if (rc)
 		return rc;
 
-	/* only these two infolevels return valid inode numbers */
-	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
+	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
 		cifs_unix_basic_to_fattr(&fattr,
 				 &((FILE_UNIX_INFO *) pfindEntry)->basic,
 				 cifs_sb);
-		tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring,
-						 &fattr);
-		obj_type = fattr.cf_dtype;
-		ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
-	} else {
-		if (pCifsF->srch_inf.info_level ==
-		    SMB_FIND_FILE_ID_FULL_DIR_INFO)
-			rc = construct_dentry(&qstring, file, &tmp_inode,
-						&tmp_dentry, &inum);
-		else
-			rc = construct_dentry(&qstring, file, &tmp_inode,
-						&tmp_dentry, NULL);
-
-		if ((tmp_inode == NULL) || (tmp_dentry == NULL)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		/* we pass in rc below, indicating whether it is a new inode,
-		 * so we can figure out whether to invalidate the inode cached
-		 * data if the file has changed
-		 */
-		if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
-			fill_in_inode(tmp_inode, 0, pfindEntry, &obj_type, rc);
-		else
-			fill_in_inode(tmp_inode, 1, pfindEntry, &obj_type, rc);
+	else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
+		cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *)
+					pfindEntry, cifs_sb);
+	else
+		cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *)
+					pfindEntry, cifs_sb);
 
-		/* new inode - needs to be tied to dentry */
-		if (rc) {
-			d_instantiate(tmp_dentry, tmp_inode);
-			if (rc == 2)
-				d_rehash(tmp_dentry);
-		}
+	/* FIXME: make _to_fattr functions fill this out */
+	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
+		fattr.cf_uniqueid = inum;
+	else
+		fattr.cf_uniqueid = iunique(sb, ROOT_I);
 
-		ino = cifs_uniqueid_to_ino_t(tmp_inode->i_ino);
-	}
+	ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
+	tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr);
 
 	rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
-		     ino, obj_type);
+		     ino, fattr.cf_dtype);
+
+	/*
+	 * we can not return filldir errors to the caller since they are
+	 * "normal" when the stat blocksize is too small - we return remapped
+	 * error instead
+	 *
+	 * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
+	 * case already. Why should we be clobbering other errors from it?
+	 */
 	if (rc) {
 		cFYI(1, ("filldir rc = %d", rc));
-		/* we can not return filldir errors to the caller
-		since they are "normal" when the stat blocksize
-		is too small - we return remapped error instead */
 		rc = -EOVERFLOW;
 	}
-
-out:
 	dput(tmp_dentry);
 	return rc;
 }
-- 
cgit v1.2.3


From aeaaf253c4dee7ff9af2f3f0595f3bb66964e944 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 9 Jul 2009 01:46:39 -0400
Subject: cifs: remove cifsInodeInfo->inUse counter

cifs: remove cifsInodeInfo->inUse counter

It was purported to be a refcounter of some sort, but was never
used that way. It never served any purpose that wasn't served equally well
by the I_NEW flag.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c   | 1 -
 fs/cifs/cifsglob.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9f669f982c4d..44f30504b82d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -308,7 +308,6 @@ cifs_alloc_inode(struct super_block *sb)
 	if (!cifs_inode)
 		return NULL;
 	cifs_inode->cifsAttrs = 0x20;	/* default */
-	atomic_set(&cifs_inode->inUse, 0);
 	cifs_inode->time = 0;
 	cifs_inode->write_behind_rc = 0;
 	/* Until the file is open and we have gotten oplock
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8bcf5a4bcded..63f6cdfa5638 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -364,7 +364,6 @@ struct cifsInodeInfo {
 	struct list_head openFileList;
 	int write_behind_rc;
 	__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
-	atomic_t inUse;	 /* num concurrent users (local openers cifs) of file*/
 	unsigned long time;	/* jiffies of last update/check of inode */
 	bool clientCanCacheRead:1;	/* read oplock */
 	bool clientCanCacheAll:1;	/* read and writebehind oplock */
-- 
cgit v1.2.3


From d0c280d26de9422c9c943f8f486b9830cd9bea70 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 9 Jul 2009 01:46:44 -0400
Subject: cifs: when ATTR_READONLY is set, only clear write bits on
 non-directories

cifs: when ATTR_READONLY is set, only clear write bits on non-directories

On windows servers, ATTR_READONLY apparently either has no meaning or
serves as some sort of queue to certain applications for unrelated
behavior. This MS kbase article has details:

http://support.microsoft.com/kb/326549/

Don't clear the write bits directory mode when ATTR_READONLY is set.

Reported-by: pouchat@peewiki.net
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a807397f444e..18afe57b2461 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -419,11 +419,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 	} else {
 		fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
 		fattr->cf_dtype = DT_REG;
-	}
 
-	/* clear write bits if ATTR_READONLY is set */
-	if (fattr->cf_cifsattrs & ATTR_READONLY)
-		fattr->cf_mode &= ~(S_IWUGO);
+		/* clear write bits if ATTR_READONLY is set */
+		if (fattr->cf_cifsattrs & ATTR_READONLY)
+			fattr->cf_mode &= ~(S_IWUGO);
+	}
 
 	fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
 
-- 
cgit v1.2.3


From 8aa7e847d834ed937a9ad37a0f2ad5b8584c1ab0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 9 Jul 2009 14:52:32 +0200
Subject: Fix congestion_wait() sync/async vs read/write confusion

Commit 1faa16d22877f4839bd433547d770c676d1d964c accidentally broke
the bdi congestion wait queue logic, causing us to wait on congestion
for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fat/file.c              | 2 +-
 fs/fuse/dev.c              | 8 ++++----
 fs/nfs/write.c             | 8 +++++---
 fs/reiserfs/journal.c      | 2 +-
 fs/xfs/linux-2.6/kmem.c    | 4 ++--
 fs/xfs/linux-2.6/xfs_buf.c | 2 +-
 6 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/file.c b/fs/fat/file.c
index b28ea646ff60..f042b965c95c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 	if ((filp->f_mode & FMODE_WRITE) &&
 	     MSDOS_SB(inode->i_sb)->options.flush) {
 		fat_flush_inodes(inode->i_sb, inode, NULL);
-		congestion_wait(WRITE, HZ/10);
+		congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
 	return 0;
 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f58ecbc416c8..6484eb75acd6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -286,8 +286,8 @@ __releases(&fc->lock)
 		}
 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 		    fc->connected && fc->bdi_initialized) {
-			clear_bdi_congested(&fc->bdi, READ);
-			clear_bdi_congested(&fc->bdi, WRITE);
+			clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+			clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 		}
 		fc->num_background--;
 		fc->active_background--;
@@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 		fc->blocked = 1;
 	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 	    fc->bdi_initialized) {
-		set_bdi_congested(&fc->bdi, READ);
-		set_bdi_congested(&fc->bdi, WRITE);
+		set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+		set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 	}
 	list_add_tail(&req->list, &fc->bg_queue);
 	flush_bg_queue(fc);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce728829f79a..0a0a2ff767c3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page)
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
 		if (atomic_long_inc_return(&nfss->writeback) >
-				NFS_CONGESTION_ON_THRESH)
-			set_bdi_congested(&nfss->backing_dev_info, WRITE);
+				NFS_CONGESTION_ON_THRESH) {
+			set_bdi_congested(&nfss->backing_dev_info,
+						BLK_RW_ASYNC);
+		}
 	}
 	return ret;
 }
@@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
 
 	end_page_writeback(page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
-		clear_bdi_congested(&nfss->backing_dev_info, WRITE);
+		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
 
 /*
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 77f5bb746bf0..90622200b39c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s)
 	DEFINE_WAIT(wait);
 	struct reiserfs_journal *j = SB_JOURNAL(s);
 	if (atomic_read(&j->j_async_throttle))
-		congestion_wait(WRITE, HZ / 10);
+		congestion_wait(BLK_RW_ASYNC, HZ / 10);
 	return 0;
 }
 
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 1cd3b55ee3d2..2d3f90afe5f1 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__func__, lflags);
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
 
@@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__func__, lflags);
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 1418b916fc27..0c93c7ef3d18 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -412,7 +412,7 @@ _xfs_buf_lookup_pages(
 
 			XFS_STATS_INC(xb_page_retries);
 			xfsbufd_wakeup(0, gfp_mask);
-			congestion_wait(WRITE, HZ/50);
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
 			goto retry;
 		}
 
-- 
cgit v1.2.3


From ecb554a846f8e9d2a58f6d6c118168a63ac065aa Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 9 Jul 2009 14:46:53 +0200
Subject: block: fix sg SG_DXFER_TO_FROM_DEV regression

I overlooked SG_DXFER_TO_FROM_DEV support when I converted sg to use
the block layer mapping API (2.6.28).

Douglas Gilbert explained SG_DXFER_TO_FROM_DEV:

http://www.spinics.net/lists/linux-scsi/msg37135.html

=
The semantics of SG_DXFER_TO_FROM_DEV were:
   - copy user space buffer to kernel (LLD) buffer
   - do SCSI command which is assumed to be of the DATA_IN
     (data from device) variety. This would overwrite
     some or all of the kernel buffer
   - copy kernel (LLD) buffer back to the user space.

The idea was to detect short reads by filling the original
user space buffer with some marker bytes ("0xec" it would
seem in this report). The "resid" value is a better way
of detecting short reads but that was only added this century
and requires co-operation from the LLD.
=

This patch changes the block layer mapping API to support this
semantics. This simply adds another field to struct rq_map_data and
enables __bio_copy_iov() to copy data from user space even with READ
requests.

It's better to add the flags field and kills null_mapped and the new
from_user fields in struct rq_map_data but that approach makes it
difficult to send this patch to stable trees because st and osst
drivers use struct rq_map_data (they were converted to use the block
layer in 2.6.29 and 2.6.30). Well, I should clean up the block layer
mapping API.

zhou sf reported this regiression and tested this patch:

http://www.spinics.net/lists/linux-scsi/msg37128.html
http://www.spinics.net/lists/linux-scsi/msg37168.html

Reported-by: zhou sf <sxzzsf@gmail.com>
Tested-by: zhou sf <sxzzsf@gmail.com>
Cc: stable@kernel.org
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 1486b19fc431..76738005c8e8 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -705,14 +705,13 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
 }
 
 static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
-			  struct sg_iovec *iov, int iov_count, int uncopy,
-			  int do_free_page)
+			  struct sg_iovec *iov, int iov_count,
+			  int to_user, int from_user, int do_free_page)
 {
 	int ret = 0, i;
 	struct bio_vec *bvec;
 	int iov_idx = 0;
 	unsigned int iov_off = 0;
-	int read = bio_data_dir(bio) == READ;
 
 	__bio_for_each_segment(bvec, bio, i, 0) {
 		char *bv_addr = page_address(bvec->bv_page);
@@ -727,13 +726,14 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
 			iov_addr = iov[iov_idx].iov_base + iov_off;
 
 			if (!ret) {
-				if (!read && !uncopy)
-					ret = copy_from_user(bv_addr, iov_addr,
-							     bytes);
-				if (read && uncopy)
+				if (to_user)
 					ret = copy_to_user(iov_addr, bv_addr,
 							   bytes);
 
+				if (from_user)
+					ret = copy_from_user(bv_addr, iov_addr,
+							     bytes);
+
 				if (ret)
 					ret = -EFAULT;
 			}
@@ -770,7 +770,8 @@ int bio_uncopy_user(struct bio *bio)
 
 	if (!bio_flagged(bio, BIO_NULL_MAPPED))
 		ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
-				     bmd->nr_sgvecs, 1, bmd->is_our_pages);
+				     bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+				     0, bmd->is_our_pages);
 	bio_free_map_data(bmd);
 	bio_put(bio);
 	return ret;
@@ -875,8 +876,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 	/*
 	 * success
 	 */
-	if (!write_to_vm && (!map_data || !map_data->null_mapped)) {
-		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
+	if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
+	    (map_data && map_data->from_user)) {
+		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
 		if (ret)
 			goto cleanup;
 	}
-- 
cgit v1.2.3


From 097041e576ee3a50d92dd643ee8ca65bf6a62e21 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Fri, 10 Jul 2009 20:06:42 -0500
Subject: fuse: Fix build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building v2.6.31-rc2-344-g69ca06c, the following build errors are
found due to missing includes:

 CC [M]  fs/fuse/dev.o
fs/fuse/dev.c: In function ‘request_end’:
fs/fuse/dev.c:289: error: ‘BLK_RW_SYNC’ undeclared (first use in this function)
...
fs/nfs/write.c: In function ‘nfs_set_page_writeback’:
fs/nfs/write.c:207: error: ‘BLK_RW_ASYNC’ undeclared (first use in this function)

Signed-off-by: Larry Finger@lwfinger.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dev.c  | 1 +
 fs/nfs/write.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6484eb75acd6..cbceacbc0bf9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0a0a2ff767c3..35d81316163f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -19,6 +19,7 @@
 #include <linux/nfs_mount.h>
 #include <linux/nfs_page.h>
 #include <linux/backing-dev.h>
+#include <linux/blkdev.h>
 
 #include <asm/uaccess.h>
 
-- 
cgit v1.2.3


From 8711c67bee675b4f7a378c71ad5a59c981ec3df0 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 10 Jul 2009 12:34:27 +0200
Subject: isofs: fix Joliet regression

commit 5404ac8e4418ab3d254950ee4f9bcafc1da20b4a ("isofs: cleanup mount
option processing") missed conversion of joliet option flag resulting
in non-working Joliet support.

CC: walt <w41ter@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/isofs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 58a7963e168a..85f96bc651c7 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -142,6 +142,7 @@ static const struct dentry_operations isofs_dentry_ops[] = {
 
 struct iso9660_options{
 	unsigned int rock:1;
+	unsigned int joliet:1;
 	unsigned int cruft:1;
 	unsigned int hide:1;
 	unsigned int showassoc:1;
@@ -151,7 +152,6 @@ struct iso9660_options{
 	unsigned int gid_set:1;
 	unsigned int utf8:1;
 	unsigned char map;
-	char joliet;
 	unsigned char check;
 	unsigned int blocksize;
 	mode_t fmode;
@@ -632,7 +632,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 			else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) {
 				sec = (struct iso_supplementary_descriptor *)vdp;
 				if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) {
-					if (opt.joliet == 'y') {
+					if (opt.joliet) {
 						if (sec->escape[2] == 0x40)
 							joliet_level = 1;
 						else if (sec->escape[2] == 0x43)
-- 
cgit v1.2.3


From 81e4e1ba7ed4a1fdcf0e2ee944f1575010471464 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 11 Jul 2009 11:22:34 -0700
Subject: Revert "fuse: Fix build error" as unnecessary

This reverts commit 097041e576ee3a50d92dd643ee8ca65bf6a62e21.

Trond had a better fix, which is the parent of this one ("Fix compile
error due to congestion_wait() changes")

Requested-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dev.c  | 1 -
 fs/nfs/write.c | 1 -
 2 files changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cbceacbc0bf9..6484eb75acd6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -16,7 +16,6 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/slab.h>
-#include <linux/blkdev.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 35d81316163f..0a0a2ff767c3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -19,7 +19,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/nfs_page.h>
 #include <linux/backing-dev.h>
-#include <linux/blkdev.h>
 
 #include <asm/uaccess.h>
 
-- 
cgit v1.2.3


From 405f55712dfe464b3240d7816cc4fe4174831be2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 11 Jul 2009 22:08:37 +0400
Subject: headers: smp_lock.h redux

* Remove smp_lock.h from files which don't need it (including some headers!)
* Add smp_lock.h to files which do need it
* Make smp_lock.h include conditional in hardirq.h
  It's needed only for one kernel_locked() usage which is under CONFIG_PREEMPT

  This will make hardirq.h inclusion cheaper for every PREEMPT=n config
  (which includes allmodconfig/allyesconfig, BTW)

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/adfs/super.c             | 1 +
 fs/afs/super.c              | 1 +
 fs/autofs4/dev-ioctl.c      | 1 -
 fs/bfs/dir.c                | 1 -
 fs/bfs/file.c               | 1 -
 fs/btrfs/compression.c      | 1 -
 fs/btrfs/file.c             | 1 -
 fs/btrfs/inode.c            | 1 -
 fs/btrfs/ioctl.c            | 1 -
 fs/btrfs/super.c            | 1 -
 fs/char_dev.c               | 1 -
 fs/compat.c                 | 1 -
 fs/compat_ioctl.c           | 1 +
 fs/exofs/super.c            | 1 +
 fs/ext2/ioctl.c             | 1 -
 fs/ext4/ioctl.c             | 1 -
 fs/fat/dir.c                | 1 -
 fs/fat/namei_msdos.c        | 1 -
 fs/fat/namei_vfat.c         | 1 -
 fs/fcntl.c                  | 1 -
 fs/freevxfs/vxfs_super.c    | 1 +
 fs/hfs/super.c              | 1 +
 fs/hfsplus/super.c          | 1 +
 fs/hpfs/dir.c               | 1 +
 fs/hpfs/file.c              | 1 +
 fs/hpfs/hpfs_fn.h           | 1 -
 fs/hpfs/inode.c             | 1 +
 fs/hpfs/namei.c             | 1 +
 fs/jffs2/super.c            | 1 +
 fs/lockd/clntproc.c         | 1 +
 fs/lockd/svc4proc.c         | 1 +
 fs/lockd/svcproc.c          | 1 +
 fs/nfs/delegation.c         | 1 +
 fs/nfs/dir.c                | 1 -
 fs/nfs/file.c               | 1 -
 fs/nfs/inode.c              | 1 -
 fs/nfs/nfs4proc.c           | 1 -
 fs/nfs/read.c               | 1 -
 fs/nfsd/nfsctl.c            | 1 -
 fs/nfsd/nfssvc.c            | 1 -
 fs/nilfs2/dir.c             | 1 -
 fs/ocfs2/ioctl.c            | 1 -
 fs/reiserfs/xattr.c         | 1 -
 fs/squashfs/super.c         | 1 +
 fs/ubifs/ioctl.c            | 1 -
 fs/xfs/linux-2.6/xfs_file.c | 1 -
 46 files changed, 17 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index aad92f0a1048..6910a98bd73c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -13,6 +13,7 @@
 #include <linux/parser.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/statfs.h>
 #include "adfs.h"
 #include "dir_f.h"
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ad0514d0115f..e1ea1c240b6a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/parser.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index f3da2eb51f56..00bf8fcb245f 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -19,7 +19,6 @@
 #include <linux/sched.h>
 #include <linux/compat.h>
 #include <linux/syscalls.h>
-#include <linux/smp_lock.h>
 #include <linux/magic.h>
 #include <linux/dcache.h>
 #include <linux/uaccess.h>
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 54bd07d44e68..1e41aadb1068 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -8,7 +8,6 @@
 #include <linux/time.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/sched.h>
 #include "bfs.h"
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 6a021265f018..88b9a3ff44e4 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -11,7 +11,6 @@
 
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
 #include "bfs.h"
 
 #undef DEBUG
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index de1e2fd32080..9d8ba4d54a37 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -26,7 +26,6 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
 #include <linux/swap.h>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7c3cd248d8d6..4b833972273a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -22,7 +22,6 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
 #include <linux/swap.h>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7ffa3d34ea19..791eab19e330 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -26,7 +26,6 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
 #include <linux/swap.h>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9f4db848db10..bd88f25889f7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -27,7 +27,6 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mount.h>
 #include <linux/mpage.h>
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9f179d4832d5..6d6d06cb6dfc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -26,7 +26,6 @@
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mount.h>
 #include <linux/mpage.h>
diff --git a/fs/char_dev.c b/fs/char_dev.c
index b7c9d5187a75..a173551e19d7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -13,7 +13,6 @@
 #include <linux/major.h>
 #include <linux/errno.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 
 #include <linux/kobject.h>
diff --git a/fs/compat.c b/fs/compat.c
index fbadb947727b..94502dab972a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -32,7 +32,6 @@
 #include <linux/smb_mount.h>
 #include <linux/ncp_mount.h>
 #include <linux/nfs4_mount.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/ctype.h>
 #include <linux/module.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 626c7483b4de..f28f070a60fc 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,6 +19,7 @@
 #include <linux/compiler.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/smp_lock.h>
 #include <linux/ioctl.h>
 #include <linux/if.h>
 #include <linux/if_bridge.h>
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index a343b4ea62f6..5ab10c3bbebe 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -31,6 +31,7 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/parser.h>
 #include <linux/vfs.h>
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 7cb4badef927..e7431309bdca 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/compat.h>
 #include <linux/mount.h>
-#include <linux/smp_lock.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bb415408fdb6..24a6abb2aef5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -12,7 +12,6 @@
 #include <linux/capability.h>
 #include <linux/time.h>
 #include <linux/compat.h>
-#include <linux/smp_lock.h>
 #include <linux/mount.h>
 #include <linux/file.h>
 #include <asm/uaccess.h>
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 38ff75a0fe22..530b4ca01510 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/time.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/compat.h>
 #include <asm/uaccess.h>
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 82f88733b681..bbc94ae4fd77 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/time.h>
 #include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
 #include "fat.h"
 
 /* Characters that are undesirable in an MS-DOS file name */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 73471b7ecc8c..cb6e83557112 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -19,7 +19,6 @@
 #include <linux/jiffies.h>
 #include <linux/ctype.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/namei.h>
 #include "fat.h"
diff --git a/fs/fcntl.c b/fs/fcntl.c
index a040b764f8e3..ae413086db97 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -19,7 +19,6 @@
 #include <linux/signal.h>
 #include <linux/rcupdate.h>
 #include <linux/pid_namespace.h>
-#include <linux/smp_lock.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index cdbd1654e4cd..1e8af939b3e4 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -38,6 +38,7 @@
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/stat.h>
 #include <linux/vfs.h>
 #include <linux/mount.h>
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6f833dc8e910..f7fcbe49da72 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -19,6 +19,7 @@
 #include <linux/nls.h>
 #include <linux/parser.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/vfs.h>
 
 #include "hfs_fs.h"
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9fc3af0c0dab..c0759fe0855b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/vfs.h>
 #include <linux/nls.h>
 
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 6916c41d7017..8865c94f55f6 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -6,6 +6,7 @@
  *  directory VFS functions
  */
 
+#include <linux/smp_lock.h>
 #include "hpfs_fn.h"
 
 static int hpfs_dir_release(struct inode *inode, struct file *filp)
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 64ab52259204..3efabff00367 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -6,6 +6,7 @@
  *  file VFS functions
  */
 
+#include <linux/smp_lock.h>
 #include "hpfs_fn.h"
 
 #define BLOCKS(size) (((size) + 511) >> 9)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c2ea31bae313..701ca54c0867 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,7 +13,6 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 
 #include "hpfs.h"
 
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 39a1bfbea312..fe703ae46bc7 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -6,6 +6,7 @@
  *  inode VFS functions
  */
 
+#include <linux/smp_lock.h>
 #include "hpfs_fn.h"
 
 void hpfs_init_inode(struct inode *i)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index b649232dde97..82b9c4ba9ed0 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -6,6 +6,7 @@
  *  adding & removing files & directories
  */
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include "hpfs_fn.h"
 
 static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 07a22caf2687..0035c021395a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/fs.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index f2fdcbce143e..4336adba952a 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 1725037374c5..bd173a6ca3b1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/in.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3688e55901fc..e1d28ddd2169 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/in.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index af05b918cb5b..6dd48a4405b4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -10,6 +10,7 @@
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 
 #include <linux/nfs4.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 89f98e9a024b..38d42c29fb92 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -29,7 +29,6 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/pagevec.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0055b813ec2c..05062329b678 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/aio.h>
 
 #include <asm/uaccess.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 64f87194d390..bd7938eda6a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -30,7 +30,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
 #include <linux/lockd/bind.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/nfs_idmap.h>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 92ce43517814..ff0c080db59b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -45,7 +45,6 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
-#include <linux/smp_lock.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/module.h>
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 96c4ebfa46f4..73ea5e8d66ce 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,7 +18,6 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
-#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1250fb978ac1..6d0847562d87 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -25,7 +25,6 @@
 #include <linux/init.h>
 #include <linux/inet.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/ctype.h>
 
 #include <linux/nfs.h>
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d4c9884cd54b..492c79b7800b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -18,7 +18,6 @@
 #include <linux/unistd.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/freezer.h>
 #include <linux/fs_struct.h>
 #include <linux/kthread.h>
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 54100acc1102..1a4fa04cf071 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -43,7 +43,6 @@
  */
 
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include "nilfs.h"
 #include "page.h"
 
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 9fcd36dcc9a0..467b413bec21 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,7 +7,6 @@
 
 #include <linux/fs.h>
 #include <linux/mount.h>
-#include <linux/smp_lock.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index f3d47d856848..6925b835a43b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -46,7 +46,6 @@
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
 #include <net/checksum.h>
-#include <linux/smp_lock.h>
 #include <linux/stat.h>
 #include <linux/quotaops.h>
 
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3b52770f46ff..cb5fc57e370b 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -30,6 +30,7 @@
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 6db7a6be6c97..8aacd64957a2 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -25,7 +25,6 @@
 /* This file implements EXT2-compatible extended attribute ioctl() calls */
 
 #include <linux/compat.h>
-#include <linux/smp_lock.h>
 #include <linux/mount.h>
 #include "ubifs.h"
 
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4e255441574..0542fd507649 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -41,7 +41,6 @@
 #include "xfs_ioctl.h"
 
 #include <linux/dcache.h>
-#include <linux/smp_lock.h>
 
 static struct vm_operations_struct xfs_file_vm_ops;
 
-- 
cgit v1.2.3


From dd0d9a46f573b086a67522f819566427dba9c4c7 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 9 Jul 2009 10:44:30 +0100
Subject: AFS: Fix compilation warning

Fix the following warning:

  fs/afs/dir.c: In function 'afs_d_revalidate':
  fs/afs/dir.c:567: warning: 'fid.vnode' may be used uninitialized in this function
  fs/afs/dir.c:567: warning: 'fid.unique' may be used uninitialized in this function

by marking the 'fid' variable as an uninitialized_var.  The problem is
that gcc doesn't always manage to work out that fid is always set on the
path through the function that uses it.

Cc: linux-afs@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 9bd757774c9e..88067f36e5e7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -564,7 +564,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct afs_vnode *vnode, *dir;
-	struct afs_fid fid;
+	struct afs_fid uninitialized_var(fid);
 	struct dentry *parent;
 	struct key *key;
 	void *dir_version;
-- 
cgit v1.2.3


From f8c73c790c588fd70fda1632c8927a87b3d31dcd Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 11 Jun 2009 15:14:40 +0200
Subject: partitions: fix broken uevent_suppress conversion

git commit f67f129e "Driver core: implement uevent suppress in kobject"
contains this chunk for fs/partitions/check.c:

 	/* suppress uevent if the disk supresses it */
-	if (!ddev->uevent_suppress)
+	if (!dev_get_uevent_suppress(pdev))
 		kobject_uevent(&pdev->kobj, KOBJ_ADD);

However that should have been

-	if (!ddev->uevent_suppress)
+	if (!dev_get_uevent_suppress(ddev))

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ming Lei <tom.leiming@gmail.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/partitions/check.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 1a9c7878f864..ea4e6cb29e13 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -436,7 +436,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	rcu_assign_pointer(ptbl->part[partno], p);
 
 	/* suppress uevent if the disk supresses it */
-	if (!dev_get_uevent_suppress(pdev))
+	if (!dev_get_uevent_suppress(ddev))
 		kobject_uevent(&pdev->kobj, KOBJ_ADD);
 
 	return p;
-- 
cgit v1.2.3


From d0b6e04a4cd8360e3c9c419f7c30a3081a0c142a Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 13 Jul 2009 10:33:21 +0800
Subject: tracing/events: Move TRACE_SYSTEM outside of include guard

If TRACE_INCLDUE_FILE is defined, <trace/events/TRACE_INCLUDE_FILE.h>
will be included and compiled, otherwise it will be
<trace/events/TRACE_SYSTEM.h>

So TRACE_SYSTEM should be defined outside of #if proctection,
just like TRACE_INCLUDE_FILE.

Imaging this scenario:

 #include <trace/events/foo.h>
    -> TRACE_SYSTEM == foo
 ...
 #include <trace/events/bar.h>
    -> TRACE_SYSTEM == bar
 ...
 #define CREATE_TRACE_POINTS
 #include <trace/events/foo.h>
    -> TRACE_SYSTEM == bar !!!

and then bar.h will be included and compiled.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A5A9CF1.2010007@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/gfs2/trace_gfs2.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 98d6ef1c1dc0..148d55c14171 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -1,12 +1,11 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+
 #if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_GFS2_H
 
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM gfs2
-#define TRACE_INCLUDE_FILE trace_gfs2
-
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
 #include <linux/dlmconstants.h>
@@ -403,5 +402,6 @@ TRACE_EVENT(gfs2_block_alloc,
 /* This part must be outside protection */
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_gfs2
 #include <trace/define_trace.h>
 
-- 
cgit v1.2.3


From e6b5d30104db5f34110678ecab14988f1f1eff63 Mon Sep 17 00:00:00 2001
From: Curt Wohlgemuth <curtw@google.com>
Date: Mon, 13 Jul 2009 09:07:20 -0400
Subject: ext4: Fix buffer head reference leak in no-journal mode

We found a problem with buffer head reference leaks when using an ext4
partition without a journal.  In particular, calls to ext4_forget() would
not to a brelse() on the input buffer head, which will cause pages they
belong to to not be reclaimable.

Further investigation showed that all places where ext4_journal_forget() and
ext4_journal_revoke() are called are subject to the same problem.  The patch
below changes __ext4_journal_forget/__ext4_journal_revoke to do an explicit
release of the buffer head when the journal handle isn't valid.

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4_jbd2.c | 4 ++++
 fs/ext4/ext4_jbd2.h | 2 ++
 fs/ext4/inode.c     | 6 ++----
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index ad13a84644e1..eb27fd0f2ee8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -43,6 +43,8 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
 			ext4_journal_abort_handle(where, __func__, bh,
 						  handle, err);
 	}
+	else
+		brelse(bh);
 	return err;
 }
 
@@ -57,6 +59,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
 			ext4_journal_abort_handle(where, __func__, bh,
 						  handle, err);
 	}
+	else
+		brelse(bh);
 	return err;
 }
 
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index d574a85aca56..139fb8cb87e4 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -131,9 +131,11 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
 int __ext4_journal_get_write_access(const char *where, handle_t *handle,
 				struct buffer_head *bh);
 
+/* When called with an invalid handle, this will still do a put on the BH */
 int __ext4_journal_forget(const char *where, handle_t *handle,
 				struct buffer_head *bh);
 
+/* When called with an invalid handle, this will still do a put on the BH */
 int __ext4_journal_revoke(const char *where, handle_t *handle,
 				ext4_fsblk_t blocknr, struct buffer_head *bh);
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c98e3afea30a..f9c642b22efa 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -78,16 +78,14 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
  * but there may still be a record of it in the journal, and that record
  * still needs to be revoked.
  *
- * If the handle isn't valid we're not journaling so there's nothing to do.
+ * If the handle isn't valid we're not journaling, but we still need to
+ * call into ext4_journal_revoke() to put the buffer head.
  */
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
 		struct buffer_head *bh, ext4_fsblk_t blocknr)
 {
 	int err;
 
-	if (!ext4_handle_valid(handle))
-		return 0;
-
 	might_sleep();
 
 	BUFFER_TRACE(bh, "enter");
-- 
cgit v1.2.3


From ac046f1d6121ccdda6db66bd88acd52418f489b2 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Mon, 13 Jul 2009 09:30:17 -0400
Subject: ext4: fix null handler of ioctls in no journal mode

The EXT4_IOC_GROUP_ADD and EXT4_IOC_GROUP_EXTEND ioctls should not
flush the journal in no_journal mode.  Otherwise, running resize2fs on
a mounted no_journal partition triggers the following error messages:

BUG: unable to handle kernel NULL pointer dereference at 00000014
IP: [<c039d282>] _spin_lock+0x8/0x19
*pde = 00000000
Oops: 0002 [#1] SMP

Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ioctl.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bb415408fdb6..01f149aea841 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -192,7 +192,7 @@ setversion_out:
 	case EXT4_IOC_GROUP_EXTEND: {
 		ext4_fsblk_t n_blocks_count;
 		struct super_block *sb = inode->i_sb;
-		int err, err2;
+		int err, err2=0;
 
 		if (!capable(CAP_SYS_RESOURCE))
 			return -EPERM;
@@ -205,9 +205,11 @@ setversion_out:
 			return err;
 
 		err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
-		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-		err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
-		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+		if (EXT4_SB(sb)->s_journal) {
+			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+		}
 		if (err == 0)
 			err = err2;
 		mnt_drop_write(filp->f_path.mnt);
@@ -252,7 +254,7 @@ setversion_out:
 	case EXT4_IOC_GROUP_ADD: {
 		struct ext4_new_group_data input;
 		struct super_block *sb = inode->i_sb;
-		int err, err2;
+		int err, err2=0;
 
 		if (!capable(CAP_SYS_RESOURCE))
 			return -EPERM;
@@ -266,9 +268,11 @@ setversion_out:
 			return err;
 
 		err = ext4_group_add(sb, &input);
-		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-		err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
-		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+		if (EXT4_SB(sb)->s_journal) {
+			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+		}
 		if (err == 0)
 			err = err2;
 		mnt_drop_write(filp->f_path.mnt);
-- 
cgit v1.2.3


From 833576b362e15c38be3bfe43942cda693e56287c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 13 Jul 2009 09:45:52 -0400
Subject: ext4: Fix ext4_mb_initialize_context() to initialize all fields

Pavel Roskin pointed out that kmemcheck indicated that
ext4_mb_store_history() was accessing uninitialized values of
ac->ac_tail and ac->ac_buddy leading to garbage in the mballoc
history.  Fix this by initializing the entire structure to all zeros
first.

Also, two fields were getting doubly initialized by the caller of
ext4_mb_initialize_context, so remove them for efficiency's sake.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 2fcaf286f1de..cd258463e2a9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4227,14 +4227,9 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 	ext4_get_group_no_and_offset(sb, goal, &group, &block);
 
 	/* set up allocation goals */
+	memset(ac, 0, sizeof(struct ext4_allocation_context));
 	ac->ac_b_ex.fe_logical = ar->logical;
-	ac->ac_b_ex.fe_group = 0;
-	ac->ac_b_ex.fe_start = 0;
-	ac->ac_b_ex.fe_len = 0;
 	ac->ac_status = AC_STATUS_CONTINUE;
-	ac->ac_groups_scanned = 0;
-	ac->ac_ex_scanned = 0;
-	ac->ac_found = 0;
 	ac->ac_sb = sb;
 	ac->ac_inode = ar->inode;
 	ac->ac_o_ex.fe_logical = ar->logical;
@@ -4245,15 +4240,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 	ac->ac_g_ex.fe_group = group;
 	ac->ac_g_ex.fe_start = block;
 	ac->ac_g_ex.fe_len = len;
-	ac->ac_f_ex.fe_len = 0;
 	ac->ac_flags = ar->flags;
-	ac->ac_2order = 0;
-	ac->ac_criteria = 0;
-	ac->ac_pa = NULL;
-	ac->ac_bitmap_page = NULL;
-	ac->ac_buddy_page = NULL;
-	ac->alloc_semp = NULL;
-	ac->ac_lg = NULL;
 
 	/* we have to define context: we'll we work with a file or
 	 * locality group. this is a policy, actually */
@@ -4521,10 +4508,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 	}
 
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac) {
-		ac->ac_sb = sb;
-		ac->ac_inode = ar->inode;
-	} else {
+	if (!ac) {
 		ar->len = 0;
 		*errp = -ENOMEM;
 		goto out1;
-- 
cgit v1.2.3


From 96577c43827697ca1af5982fa256a34786d0c720 Mon Sep 17 00:00:00 2001
From: dingdinghua <dingdinghua85@gmail.com>
Date: Mon, 13 Jul 2009 17:55:35 -0400
Subject: jbd2: fix race between write_metadata_buffer and get_write_access

The function jbd2_journal_write_metadata_buffer() calls
jbd_unlock_bh_state(bh_in) too early; this could potentially allow
another thread to call get_write_access on the buffer head, modify the
data, and dirty it, and allowing the wrong data to be written into the
journal.  Fortunately, if we lose this race, the only time this will
actually cause filesystem corruption is if there is a system crash or
other unclean shutdown of the system before the next commit can take
place.

Signed-off-by: dingdinghua <dingdinghua85@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/journal.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 7b545c3b3942..e378cb383979 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 	unsigned int new_offset;
 	struct buffer_head *bh_in = jh2bh(jh_in);
 	struct jbd2_buffer_trigger_type *triggers;
+	journal_t *journal = transaction->t_journal;
 
 	/*
 	 * The buffer really shouldn't be locked: only the current committing
@@ -310,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
 
 	new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+	/* keep subsequent assertions sane */
+	new_bh->b_state = 0;
+	init_buffer(new_bh, NULL, NULL);
+	atomic_set(&new_bh->b_count, 1);
+	new_jh = jbd2_journal_add_journal_head(new_bh);	/* This sleeps */
 
 	/*
 	 * If a new transaction has already done a buffer copy-out, then
@@ -388,14 +394,6 @@ repeat:
 		kunmap_atomic(mapped_data, KM_USER0);
 	}
 
-	/* keep subsequent assertions sane */
-	new_bh->b_state = 0;
-	init_buffer(new_bh, NULL, NULL);
-	atomic_set(&new_bh->b_count, 1);
-	jbd_unlock_bh_state(bh_in);
-
-	new_jh = jbd2_journal_add_journal_head(new_bh);	/* This sleeps */
-
 	set_bh_page(new_bh, new_page, new_offset);
 	new_jh->b_transaction = NULL;
 	new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -412,7 +410,11 @@ repeat:
 	 * copying is moved to the transaction's shadow queue.
 	 */
 	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
-	jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
+	spin_lock(&journal->j_list_lock);
+	__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh_in);
+
 	JBUFFER_TRACE(new_jh, "file as BJ_IO");
 	jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
 
-- 
cgit v1.2.3


From a89d63a159b1ba5833be2bef00adf8ad8caac8be Mon Sep 17 00:00:00 2001
From: Casey Dahlin <cdahlin@redhat.com>
Date: Tue, 14 Jul 2009 12:17:51 -0500
Subject: dlm: free socket in error exit path

In the tcp_connect_to_sock() error exit path, the socket
allocated at the top of the function was not being freed.

Signed-off-by: Casey Dahlin <cdahlin@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cdb580a9c7a2..618a60f03886 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -902,7 +902,7 @@ static void tcp_connect_to_sock(struct connection *con)
 	int result = -EHOSTUNREACH;
 	struct sockaddr_storage saddr, src_addr;
 	int addr_len;
-	struct socket *sock;
+	struct socket *sock = NULL;
 
 	if (con->nodeid == 0) {
 		log_print("attempt to connect sock 0 foiled");
@@ -962,6 +962,8 @@ out_err:
 	if (con->sock) {
 		sock_release(con->sock);
 		con->sock = NULL;
+	} else if (sock) {
+		sock_release(sock);
 	}
 	/*
 	 * Some errors are fatal and this list might need adjusting. For other
-- 
cgit v1.2.3