From 16cb4b333c9e7a00ce3b1d74ec0c9b4c2e956910 Mon Sep 17 00:00:00 2001
From: Per Liden <per.liden@nospam.ericsson.com>
Date: Fri, 13 Jan 2006 22:22:22 +0100
Subject: [TIPC] Updated link priority macros

Added macros for min/default/max link priority in tipc_config.h.
Also renamed TIPC_NUM_LINK_PRI to TIPC_MEDIA_LINK_PRI since that
is a more accurate description of what it is used for.

Signed-off-by: Per Liden <per.liden@ericsson.com>
---
 include/linux/tipc_config.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index a52c8c64a5a3..33a653913d94 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -168,10 +168,13 @@
 #define TIPC_MAX_LINK_NAME	60	/* format = Z.C.N:interface-Z.C.N:interface */
 
 /*
- * Link priority limits (range from 0 to # priorities - 1)
+ * Link priority limits (min, default, max, media default)
  */
 
-#define TIPC_NUM_LINK_PRI 32
+#define TIPC_MIN_LINK_PRI	0
+#define TIPC_DEF_LINK_PRI	10
+#define TIPC_MAX_LINK_PRI	31
+#define TIPC_MEDIA_LINK_PRI	(TIPC_MAX_LINK_PRI + 1)
 
 /*
  * Link tolerance limits (min, default, max), in ms
-- 
cgit v1.2.3


From 33a9c4da5ab16192ef1e961d4c4e45c18031cd67 Mon Sep 17 00:00:00 2001
From: Per Liden <per.liden@ericsson.com>
Date: Mon, 16 Jan 2006 11:42:12 +0100
Subject: [TIPC] Move ethernet protocol id to linux/if_ether.h

Signed-off-by: Per Liden <per.liden@ericsson.com>
---
 include/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index fe26d431de87..7a92c1ce1457 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -72,6 +72,7 @@
 					 * over Ethernet
 					 */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
+#define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 
 /*
  *	Non DIX types. Won't clash for 1500 types.
-- 
cgit v1.2.3


From d9004eb466d03b7900ed432fecec6819012b4ed3 Mon Sep 17 00:00:00 2001
From: Alon Bar-Lev <alon.barlev@gmail.com>
Date: Wed, 18 Jan 2006 11:47:33 +0000
Subject: [SERIAL] Add 8250 support for Decision Computer International Co.
 PCCOM2

There is a new device which is look like:

	Serial controller: Decision Computer International Co. PCCOM2 (rev 02) (prog-if 02 [16550])
	0700: 6666:0004 (rev 02) (prog-if 02)
	Flags: medium devsel, IRQ 177
	Memory at fe000000 (32-bit, non-prefetchable) [size=128]
	I/O ports at e880 [size=128]
	I/O ports at e400 [size=256]

It has two 16550A, and is not listed in kernel, although the
manufacturer clams that it is supported...

I've created the following patch, it only add the new PCI id and the
card to the repository, it seems to work.

Signed-off-by: Alon Bar-Lev <alon.barlev@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 5403257ae3e7..ecc1fc1f0f04 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1992,6 +1992,7 @@
 #define PCI_VENDOR_ID_DCI		0x6666
 #define PCI_DEVICE_ID_DCI_PCCOM4	0x0001
 #define PCI_DEVICE_ID_DCI_PCCOM8	0x0002
+#define PCI_DEVICE_ID_DCI_PCCOM2	0x0004
 
 #define PCI_VENDOR_ID_INTEL		0x8086
 #define PCI_DEVICE_ID_INTEL_EESSC	0x0008
-- 
cgit v1.2.3


From 053837fce7aa79025ed57656855df09f80175527 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 18 Jan 2006 17:42:27 -0800
Subject: [PATCH] mm: migration page refcounting fix

Migration code currently does not take a reference to target page
properly, so between unlocking the pte and trying to take a new
reference to the page with isolate_lru_page, anything could happen to
it.

Fix this by holding the pte lock until we get a chance to elevate the
refcount.

Other small cleanups while we're here.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm_inline.h | 21 ---------------------
 include/linux/swap.h      |  1 +
 2 files changed, 1 insertion(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 49cc68af01f8..8ac854f7f190 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page)
 	}
 }
 
-/*
- * Isolate one page from the LRU lists.
- *
- * - zone->lru_lock must be held
- */
-static inline int __isolate_lru_page(struct page *page)
-{
-	if (unlikely(!TestClearPageLRU(page)))
-		return 0;
-
-	if (get_page_testone(page)) {
-		/*
-		 * It is being freed elsewhere
-		 */
-		__put_page(page);
-		SetPageLRU(page);
-		return -ENOENT;
-	}
-
-	return 1;
-}
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e92054d6530b..d01f7efb0f2c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *));
 extern void FASTCALL(activate_page(struct page *));
 extern void FASTCALL(mark_page_accessed(struct page *));
 extern void lru_add_drain(void);
+extern int lru_add_drain_all(void);
 extern int rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 
-- 
cgit v1.2.3


From 9eeff2395e3cfd05c9b2e6074ff943a34b0c5c21 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 18 Jan 2006 17:42:31 -0800
Subject: [PATCH] Zone reclaim: Reclaim logic

Some bits for zone reclaim exists in 2.6.15 but they are not usable.  This
patch fixes them up, removes unused code and makes zone reclaim usable.

Zone reclaim allows the reclaiming of pages from a zone if the number of
free pages falls below the watermarks even if other zones still have enough
pages available.  Zone reclaim is of particular importance for NUMA
machines.  It can be more beneficial to reclaim a page than taking the
performance penalties that come with allocating a page on a remote zone.

Zone reclaim is enabled if the maximum distance to another node is higher
than RECLAIM_DISTANCE, which may be defined by an arch.  By default
RECLAIM_DISTANCE is 20.  20 is the distance to another node in the same
component (enclosure or motherboard) on IA64.  The meaning of the NUMA
distance information seems to vary by arch.

If zone reclaim is not successful then no further reclaim attempts will
occur for a certain time period (ZONE_RECLAIM_INTERVAL).

This patch was discussed before. See

http://marc.theaimsgroup.com/?l=linux-kernel&m=113519961504207&w=2
http://marc.theaimsgroup.com/?l=linux-kernel&m=113408418232531&w=2
http://marc.theaimsgroup.com/?l=linux-kernel&m=113389027420032&w=2
http://marc.theaimsgroup.com/?l=linux-kernel&m=113380938612205&w=2

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h   | 12 +++++++-----
 include/linux/swap.h     | 11 +++++++++++
 include/linux/topology.h |  8 ++++++++
 3 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 34cbefd2ebde..93a849f742db 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -149,14 +149,16 @@ struct zone {
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	int			all_unreclaimable; /* All pages pinned */
 
-	/*
-	 * Does the allocator try to reclaim pages from the zone as soon
-	 * as it fails a watermark_ok() in __alloc_pages?
-	 */
-	int			reclaim_pages;
 	/* A count of how many reclaimers are scanning this zone */
 	atomic_t		reclaim_in_progress;
 
+	/*
+	 * timestamp (in jiffies) of the last zone reclaim that did not
+	 * result in freeing of pages. This is used to avoid repeated scans
+	 * if all memory in the zone is in use.
+	 */
+	unsigned long		last_unsuccessful_zone_reclaim;
+
 	/*
 	 * prev_priority holds the scanning priority for this zone.  It is
 	 * defined as the scanning priority at which we achieved our reclaim
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d01f7efb0f2c..4a99e4a7fbf3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -176,6 +176,17 @@ extern int try_to_free_pages(struct zone **, gfp_t);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
 
+#ifdef CONFIG_NUMA
+extern int zone_reclaim_mode;
+extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
+#else
+#define zone_reclaim_mode 0
+static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_MIGRATION
 extern int isolate_lru_page(struct page *p);
 extern int putback_lru_pages(struct list_head *l);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 315a5163d6a0..e8eb0040ce3a 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -56,6 +56,14 @@
 #define REMOTE_DISTANCE		20
 #define node_distance(from,to)	((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
 #endif
+#ifndef RECLAIM_DISTANCE
+/*
+ * If the distance between nodes in a system is larger than RECLAIM_DISTANCE
+ * (in whatever arch specific measurement units returned by node_distance())
+ * then switch on zone reclaim on boot.
+ */
+#define RECLAIM_DISTANCE 20
+#endif
 #ifndef PENALTY_FOR_NODE_WITH_CPUS
 #define PENALTY_FOR_NODE_WITH_CPUS	(1)
 #endif
-- 
cgit v1.2.3


From 1743660b911bfb849b1fb33830522254561b9f9b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 18 Jan 2006 17:42:32 -0800
Subject: [PATCH] Zone reclaim: proc override

proc support for zone reclaim

This patch creates a proc entry /proc/sys/vm/zone_reclaim_mode that may be
used to override the automatic determination of the zone reclaim made on
bootup.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sysctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7f472127b7b5..8352a7ce5895 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -182,6 +182,7 @@ enum
 	VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
 	VM_DROP_PAGECACHE=29,	/* int: nuke lots of pagecache */
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
+	VM_ZONE_RECLAIM_MODE=31,/* reclaim local zone memory before going off node */
 };
 
 
-- 
cgit v1.2.3


From dc85da15d42b0efc792b0f5eab774dc5dbc1ceec Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Wed, 18 Jan 2006 17:42:36 -0800
Subject: [PATCH] NUMA policies in the slab allocator V2

This patch fixes a regression in 2.6.14 against 2.6.13 that causes an
imbalance in memory allocation during bootup.

The slab allocator in 2.6.13 is not numa aware and simply calls
alloc_pages().  This means that memory policies may control the behavior of
alloc_pages().  During bootup the memory policy is set to MPOL_INTERLEAVE
resulting in the spreading out of allocations during bootup over all
available nodes.  The slab allocator in 2.6.13 has only a single list of
slab pages.  As a result the per cpu slab cache and the spinlock controlled
page lists may contain slab entries from off node memory.  The slab
allocator in 2.6.13 makes no effort to discern the locality of an entry on
its lists.

The NUMA aware slab allocator in 2.6.14 controls locality of the slab pages
explicitly by calling alloc_pages_node().  The NUMA slab allocator manages
slab entries by having lists of available slab pages for each node.  The
per cpu slab cache can only contain slab entries associated with the node
local to the processor.  This guarantees that the default allocation mode
of the slab allocator always assigns local memory if available.

Setting MPOL_INTERLEAVE as a default policy during bootup has no effect
anymore.  In 2.6.14 all node unspecific slab allocations are performed on
the boot processor.  This means that most of key data structures are
allocated on one node.  Most processors will have to refer to these
structures making the boot node a potential bottleneck.  This may reduce
performance and cause unnecessary memory pressure on the boot node.

This patch implements NUMA policies in the slab layer.  There is the need
of explicit application of NUMA memory policies by the slab allcator itself
since the NUMA slab allocator does no longer let the page_allocator control
locality.

The check for policies is made directly at the beginning of __cache_alloc
using current->mempolicy.  The memory policy is already frequently checked
by the page allocator (alloc_page_vma() and alloc_page_current()).  So it
is highly likely that the cacheline is present.  For MPOL_INTERLEAVE
kmalloc() will spread out each request to one node after another so that an
equal distribution of allocations can be obtained during bootup.

It is not possible to push the policy check to lower layers of the NUMA
slab allocator since the per cpu caches are now only containing slab
entries from the current node.  If the policy says that the local node is
not to be preferred or forbidden then there is no point in checking the
slab cache or local list of slab pages.  The allocation better be directed
immediately to the lists containing slab entries for the allowed set of
nodes.

This way of applying policy also fixes another strange behavior in 2.6.13.
alloc_pages() is controlled by the memory allocation policy of the current
process.  It could therefore be that one process is running with
MPOL_INTERLEAVE and would f.e.  obtain a new page following that policy
since no slab entries are in the lists anymore.  A page can typically be
used for multiple slab entries but lets say that the current process is
only using one.  The other entries are then added to the slab lists.  These
are now non local entries in the slab lists despite of the possible
availability of local pages that would provide faster access and increase
the performance of the application.

Another process without MPOL_INTERLEAVE may now run and expect a local slab
entry from kmalloc().  However, there are still these free slab entries
from the off node page obtained from the other process via MPOL_INTERLEAVE
in the cache.  The process will then get an off node slab entry although
other slab entries may be available that are local to that process.  This
means that the policy if one process may contaminate the locality of the
slab caches for other processes.

This patch in effect insures that a per process policy is followed for the
allocation of slab entries and that there cannot be a memory policy
influence from one process to another.  A process with default policy will
always get a local slab entry if one is available.  And the process using
memory policies will get its memory arranged as requested.  Off-node slab
allocation will require the use of spinlocks and will make the use of per
cpu caches not possible.  A process using memory policies to redirect
allocations offnode will have to cope with additional lock overhead in
addition to the latency added by the need to access a remote slab entry.

Changes V1->V2
- Remove #ifdef CONFIG_NUMA by moving forward declaration into
  prior #ifdef CONFIG_NUMA section.

- Give the function determining the node number to use a saner
  name.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mempolicy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index d6a53ed6ab6c..bbd2221923c3 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -159,6 +159,7 @@ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 extern struct mempolicy default_policy;
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 		unsigned long addr);
+extern unsigned slab_node(struct mempolicy *policy);
 
 extern int policy_zone;
 
-- 
cgit v1.2.3


From 5131cf154ad1c6e584efa58d17a469d0b80f49bd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 Jan 2006 17:43:04 -0800
Subject: [PATCH] add missing syscall declarations

All standard system calls should be declared in include/linux/syscalls.h.

Add some of the new additions that were previously missed.

Signed-off-by: Arnd Bergmann <arndb@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/syscalls.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3eed47347013..e666d6070569 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -510,9 +510,24 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3,
 asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
 asmlinkage long sys_ioprio_get(int which, int who);
 asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
-					unsigned long maxnode);
+				unsigned long maxnode);
 asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
-			const unsigned long __user *from, const unsigned long __user *to);
+				const unsigned long __user *from,
+				const unsigned long __user *to);
+asmlinkage long sys_mbind(unsigned long start, unsigned long len,
+				unsigned long mode,
+				unsigned long __user *nmask,
+				unsigned long maxnode,
+				unsigned flags);
+asmlinkage long sys_get_mempolicy(int __user *policy,
+				unsigned long __user *nmask,
+				unsigned long maxnode,
+				unsigned long addr, unsigned long flags);
+
+asmlinkage long sys_inotify_init(void);
+asmlinkage long sys_inotify_add_watch(int fd, const char __user *path,
+					u32 mask);
+asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
 
 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
 				 __u32 __user *ustatus);
-- 
cgit v1.2.3


From f193fbab2e4710f629e7c1859d4908646b47b126 Mon Sep 17 00:00:00 2001
From: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Date: Wed, 18 Jan 2006 17:43:13 -0800
Subject: [PATCH] nfsd: check error status from nfsd_sync_dir

Change nfsd_sync_dir to return an error if ->sync fails, and pass that error
up through the stack.  This involves a number of rearrangements of error
paths, and care to distinguish between Linux -errno numbers and NFSERR
numbers.

In the 'create' routines, we continue with the 'setattr' even if a previous
sync_dir failed.

This patch is quite different from Takashi's in a few ways, but there is still
a strong lineage.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/nfsd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 51c231a1e5a6..ec7c2e872d72 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -124,7 +124,7 @@ int		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 
 int		nfsd_notify_change(struct inode *, struct iattr *);
 int		nfsd_permission(struct svc_export *, struct dentry *, int);
-void		nfsd_sync_dir(struct dentry *dp);
+int		nfsd_sync_dir(struct dentry *dp);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
-- 
cgit v1.2.3


From 1918e341383ab787d6c5b17200f4ed901b10c777 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 18 Jan 2006 17:43:16 -0800
Subject: [PATCH] svcrpc: save and restore the daddr field when request
 deferred

The server code currently keeps track of the destination address on every
request so that it can reply using the same address.  However we forget to do
that in the case of a deferred request.  Remedy this oversight.  >From folks
at PolyServe.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sunrpc/svc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e4086ec8b952..50cab2a09f28 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -246,6 +246,7 @@ struct svc_deferred_req {
 	u32			prot;	/* protocol (UDP or TCP) */
 	struct sockaddr_in	addr;
 	struct svc_sock		*svsk;	/* where reply must go */
+	u32			daddr;	/* where reply must come from */
 	struct cache_deferred_req handle;
 	int			argslen;
 	u32			args[0];
-- 
cgit v1.2.3


From 3a65588adc4401622b204caa897123e16a4a0318 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 18 Jan 2006 17:43:19 -0800
Subject: [PATCH] nfsd4: rename lk_stateowner

One of the things that's confusing about nfsd4_lock is that the lk_stateowner
field could be set to either of two different lockowners: the open owner or
the lock owner.  Rename to lk_replay_owner and add a comment to make it clear
that it's used for whichever stateowner has its sequence id bumped for replay
detection.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/xdr4.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 8903688890ce..77adba7d2281 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -145,8 +145,9 @@ struct nfsd4_lock {
 		} ok;
 		struct nfsd4_lock_denied        denied;
 	} u;
-
-	struct nfs4_stateowner *lk_stateowner;
+	/* The lk_replay_owner is the open owner in the open_to_lock_owner
+	 * case and the lock owner otherwise: */
+	struct nfs4_stateowner *lk_replay_owner;
 };
 #define lk_new_open_seqid       v.new.open_seqid
 #define lk_new_open_stateid     v.new.open_stateid
-- 
cgit v1.2.3


From 5590ff0d5528b60153c0b4e7b771472b5a95e297 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 18 Jan 2006 17:43:53 -0800
Subject: [PATCH] vfs: *at functions: core

Here is a series of patches which introduce in total 13 new system calls
which take a file descriptor/filename pair instead of a single file
name.  These functions, openat etc, have been discussed on numerous
occasions.  They are needed to implement race-free filesystem traversal,
they are necessary to implement a virtual per-thread current working
directory (think multi-threaded backup software), etc.

We have in glibc today implementations of the interfaces which use the
/proc/self/fd magic.  But this code is rather expensive.  Here are some
results (similar to what Jim Meyering posted before).

The test creates a deep directory hierarchy on a tmpfs filesystem.  Then
rm -fr is used to remove all directories.  Without syscall support I get
this:

real    0m31.921s
user    0m0.688s
sys     0m31.234s

With syscall support the results are much better:

real    0m20.699s
user    0m0.536s
sys     0m20.149s

The interfaces are for obvious reasons currently not much used.  But they'll
be used.  coreutils (and Jeff's posixutils) are already using them.
Furthermore, code like ftw/fts in libc (maybe even glob) will also start using
them.  I expect a patch to make follow soon.  Every program which is walking
the filesystem tree will benefit.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@ftp.linux.org.uk>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fcntl.h | 7 +++++++
 include/linux/fs.h    | 7 +++++--
 include/linux/namei.h | 7 ++++---
 include/linux/time.h  | 2 +-
 4 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 8a7c82151de9..c52a63755fdd 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -23,6 +23,13 @@
 #define DN_ATTRIB	0x00000020	/* File changed attibutes */
 #define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
 
+#define AT_FDCWD		-100    /* Special value used to indicate
+                                           openat should use the current
+                                           working directory. */
+#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
+#define AT_REMOVEDIR		0x200   /* Remove directory instead of
+                                           unlinking file.  */
+
 #ifdef __KERNEL__
 
 #ifndef force_o_largefile
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b77f2608eef9..84bb449b9b01 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1340,7 +1340,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
-extern long do_sys_open(const char __user *filename, int flags, int mode);
+extern long do_sys_open(int fdf, const char __user *filename, int flags,
+			int mode);
 extern struct file *filp_open(const char *, int, int);
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
 extern int filp_close(struct file *, fl_owner_t id);
@@ -1479,7 +1480,7 @@ static inline void allow_write_access(struct file *file)
 }
 extern int do_pipe(int *);
 
-extern int open_namei(const char *, int, int, struct nameidata *);
+extern int open_namei(int dfd, const char *, int, int, struct nameidata *);
 extern int may_open(struct nameidata *, int, int);
 
 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
@@ -1677,6 +1678,8 @@ extern int vfs_readdir(struct file *, filldir_t, void *);
 
 extern int vfs_stat(char __user *, struct kstat *);
 extern int vfs_lstat(char __user *, struct kstat *);
+extern int vfs_stat_fd(int dfd, char __user *, struct kstat *);
+extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *);
 extern int vfs_fstat(unsigned int, struct kstat *);
 
 extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index b699e427c00c..e6698013e4d0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -56,10 +56,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_ACCESS		(0x0400)
 
 extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
+extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
 #define user_path_walk(name,nd) \
-	__user_walk(name, LOOKUP_FOLLOW, nd)
+	__user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd)
 #define user_path_walk_link(name,nd) \
-	__user_walk(name, 0, nd)
+	__user_walk_fd(AT_FDCWD, name, 0, nd)
 extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
@@ -67,7 +68,7 @@ extern void path_release(struct nameidata *);
 extern void path_release_on_umount(struct nameidata *);
 
 extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags);
-extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags);
+extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags);
 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *));
 extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
diff --git a/include/linux/time.h b/include/linux/time.h
index f2aca7ec6325..614dd8465839 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -74,7 +74,7 @@ extern void do_gettimeofday(struct timeval *tv);
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
 #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
-extern long do_utimes(char __user *filename, struct timeval *times);
+extern long do_utimes(int dfd, char __user *filename, struct timeval *times);
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
-- 
cgit v1.2.3


From 150256d8aadb3a337c31efa9e175cbd25bf06b06 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Wed, 18 Jan 2006 17:43:57 -0800
Subject: [PATCH] Generic sys_rt_sigsuspend()

The TIF_RESTORE_SIGMASK flag allows us to have a generic implementation of
sys_rt_sigsuspend() instead of duplicating it for each architecture.  This
provides such an implementation and makes arch/powerpc use it.

It also tidies up the ppc32 sys_sigsuspend() to use TIF_RESTORE_SIGMASK.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2df1a1a2fee5..0cfcd1c7865e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -809,6 +809,7 @@ struct task_struct {
 	struct sighand_struct *sighand;
 
 	sigset_t blocked, real_blocked;
+	sigset_t saved_sigmask;		/* To be restored with TIF_RESTORE_SIGMASK */
 	struct sigpending pending;
 
 	unsigned long sas_ss_sp;
-- 
cgit v1.2.3


From 9f72949f679df06021c9e43886c9191494fdb007 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Wed, 18 Jan 2006 17:44:05 -0800
Subject: [PATCH] Add pselect/ppoll system call implementation

The following implementation of ppoll() and pselect() system calls
depends on the architecture providing a TIF_RESTORE_SIGMASK flag in the
thread_info.

These system calls have to change the signal mask during their
operation, and signal handlers must be invoked using the new, temporary
signal mask. The old signal mask must be restored either upon successful
exit from the system call, or upon returning from the invoked signal
handler if the system call is interrupted. We can't simply restore the
original signal mask and return to userspace, since the restored signal
mask may actually block the signal which interrupted the system call.

The TIF_RESTORE_SIGMASK flag deals with this by causing the syscall exit
path to trap into do_signal() just as TIF_SIGPENDING does, and by
causing do_signal() to use the saved signal mask instead of the current
signal mask when setting up the stack frame for the signal handler -- or
by causing do_signal() to simply restore the saved signal mask in the
case where there is no handler to be invoked.

The first patch implements the sys_pselect() and sys_ppoll() system
calls, which are present only if TIF_RESTORE_SIGMASK is defined. That
#ifdef should go away in time when all architectures have implemented
it. The second patch implements TIF_RESTORE_SIGMASK for the PowerPC
kernel (in the -mm tree), and the third patch then removes the
arch-specific implementations of sys_rt_sigsuspend() and replaces them
with generic versions using the same trick.

The fourth and fifth patches, provided by David Howells, implement
TIF_RESTORE_SIGMASK for FR-V and i386 respectively, and the sixth patch
adds the syscalls to the i386 syscall table.

This patch:

Add the pselect() and ppoll() system calls, providing core routines usable by
the original select() and poll() system calls and also the new calls (with
their semantics w.r.t timeouts).

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/poll.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/poll.h b/include/linux/poll.h
index f6da702088f4..8e8f6098508a 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -92,7 +92,11 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset)
 	memset(fdset, 0, FDS_BYTES(nr));
 }
 
-extern int do_select(int n, fd_set_bits *fds, long *timeout);
+#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
+
+extern int do_select(int n, fd_set_bits *fds, s64 *timeout);
+extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
+		       s64 *timeout);
 
 #endif /* KERNEL */
 
-- 
cgit v1.2.3


From 4f2d7680cb1ac5c5a70f3ba2447d5aa5c0a1643a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 19 Jan 2006 16:58:37 -0800
Subject: [NETFILTER] x_tables: Make XT_ALIGN align as strictly as necessary.

Or else we break on ppc32 and other 32-bit platforms.

Based upon a patch from Harald Welte.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 472f04834809..59ff6c430cf6 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -19,7 +19,7 @@ struct xt_get_revision
 /* For standard target */
 #define XT_RETURN (-NF_REPEAT - 1)
 
-#define XT_ALIGN(s) (((s) + (__alignof__(void *)-1)) & ~(__alignof__(void *)-1))
+#define XT_ALIGN(s) (((s) + (__alignof__(u_int64_t)-1)) & ~(__alignof__(u_int64_t)-1))
 
 /* Standard return verdict, or do jump. */
 #define XT_STANDARD_TARGET ""
-- 
cgit v1.2.3