From 986a80d5c154808cc78170584670324a22fd8219 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Thu, 16 Jun 2005 15:14:00 -0700
Subject: [PATCH] avoid signed vs unsigned comparison in efi_range_is_wc()

warning when building with gcc -W :

 include/linux/efi.h: In function `efi_range_is_wc':
 include/linux/efi.h:320: warning: comparison between signed and unsigned

It looks to me like a significantly large 'len' passed in could cause the
loop to never end. Isn't it safer to make 'i' an unsigned long as well?
Like this little patch below (which of course also kills the warning) :

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/efi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 047e7222df7a..73781ec165b4 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -315,7 +315,7 @@ extern struct efi_memory_map memmap;
  */
 static inline int efi_range_is_wc(unsigned long start, unsigned long len)
 {
-	int i;
+	unsigned long i;
 
 	for (i = 0; i < len; i += (1UL << EFI_PAGE_SHIFT)) {
 		unsigned long paddr = __pa(start + i);
-- 
cgit v1.2.3


From dd87147eed934eaff92869f3d158697c7239d1d2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 20 Jun 2005 13:21:43 -0700
Subject: [IPSEC]: Add XFRM_STATE_NOPMTUDISC flag

This patch adds the flag XFRM_STATE_NOPMTUDISC for xfrm states.  It is
similar to the nopmtudisc on IPIP/GRE tunnels.  It only has an effect
on IPv4 tunnel mode states.  For these states, it will ensure that the
DF flag is always cleared.

This is primarily useful to work around ICMP blackholes.

In future this flag could also allow a larger MTU to be set within the
tunnel just like IPIP/GRE tunnels.  This could be useful for short haul
tunnels where temporary fragmentation outside the tunnel is desired over
smaller fragments inside the tunnel.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: James Morris <jmorris@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pfkeyv2.h | 1 +
 include/linux/xfrm.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index e6b519220245..724066778aff 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -245,6 +245,7 @@ struct sadb_x_nat_t_port {
 
 /* Security Association flags */
 #define SADB_SAFLAGS_PFS	1
+#define SADB_SAFLAGS_NOPMTUDISC	0x20000000
 #define SADB_SAFLAGS_DECAP_DSCP	0x40000000
 #define SADB_SAFLAGS_NOECN	0x80000000
 
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index d68391a9b9f3..f0d423300d84 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -196,6 +196,7 @@ struct xfrm_usersa_info {
 	__u8				flags;
 #define XFRM_STATE_NOECN	1
 #define XFRM_STATE_DECAP_DSCP	2
+#define XFRM_STATE_NOPMTUDISC	4
 };
 
 struct xfrm_usersa_id {
-- 
cgit v1.2.3


From f6e276ee67c0ac9efafd24bc6f7a84aa359656df Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 20 Jun 2005 13:32:05 -0700
Subject: [ATALK]: endian annotations

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 31d3fc25ccbd..09a1451c1159 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -20,7 +20,7 @@
 #define SIOCATALKDIFADDR       (SIOCPROTOPRIVATE + 0)
 
 struct atalk_addr {
-	__u16	s_net;
+	__be16	s_net;
 	__u8	s_node;
 };
 
@@ -33,8 +33,8 @@ struct sockaddr_at {
 
 struct atalk_netrange {
 	__u8	nr_phase;
-	__u16	nr_firstnet;
-	__u16	nr_lastnet;
+	__be16	nr_firstnet;
+	__be16	nr_lastnet;
 };
 
 #ifdef __KERNEL__
@@ -70,8 +70,8 @@ struct atalk_iface {
 struct atalk_sock {
 	/* struct sock has to be the first member of atalk_sock */
 	struct sock	sk;
-	unsigned short	dest_net;
-	unsigned short	src_net;
+	__be16		dest_net;
+	__be16		src_net;
 	unsigned char	dest_node;
 	unsigned char	src_node;
 	unsigned char	dest_port;
@@ -95,9 +95,9 @@ struct ddpehdr {
 		deh_hops:4,
 		deh_len:10;
 #endif
-	__u16	deh_sum;
-	__u16	deh_dnet;
-	__u16	deh_snet;
+	__be16	deh_sum;
+	__be16	deh_dnet;
+	__be16	deh_snet;
 	__u8	deh_dnode;
 	__u8	deh_snode;
 	__u8	deh_dport;
@@ -142,24 +142,24 @@ struct ddpshdr {
 
 /* AppleTalk AARP headers */
 struct elapaarp {
-	__u16	hw_type;
+	__be16	hw_type;
 #define AARP_HW_TYPE_ETHERNET		1
 #define AARP_HW_TYPE_TOKENRING		2
-	__u16	pa_type;
+	__be16	pa_type;
 	__u8	hw_len;
 	__u8	pa_len;
 #define AARP_PA_ALEN			4
-	__u16	function;
+	__be16	function;
 #define AARP_REQUEST			1
 #define AARP_REPLY			2
 #define AARP_PROBE			3
 	__u8	hw_src[ETH_ALEN]	__attribute__ ((packed));
 	__u8	pa_src_zero		__attribute__ ((packed));
-	__u16	pa_src_net		__attribute__ ((packed));
+	__be16	pa_src_net		__attribute__ ((packed));
 	__u8	pa_src_node		__attribute__ ((packed));
 	__u8	hw_dst[ETH_ALEN]	__attribute__ ((packed));
 	__u8	pa_dst_zero		__attribute__ ((packed));
-	__u16	pa_dst_net		__attribute__ ((packed));
+	__be16	pa_dst_net		__attribute__ ((packed));
 	__u8	pa_dst_node		__attribute__ ((packed));	
 };
 
-- 
cgit v1.2.3


From 246955fe4c38bd706ae30e37c64892c94213775d Mon Sep 17 00:00:00 2001
From: Robert Olsson <Robert.Olsson@data.slu.se>
Date: Mon, 20 Jun 2005 13:36:39 -0700
Subject: [NETLINK]: fib_lookup() via netlink

Below is a more generic patch to do fib_lookup via netlink. For others
we should say that we discussed this as a way to verify route selection.
It's also possible there are others uses for this.

In short the fist half of struct fib_result_nl is filled in by caller
and netlink call fills in the other half and returns it.

In case anyone is interested there is a corresponding user app to compare
the full routing table this was used to test implementation of the LC-trie.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index e38407a23d04..561d4dc75836 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -14,6 +14,7 @@
 #define NETLINK_SELINUX		7	/* SELinux event notifications */
 #define NETLINK_ARPD		8
 #define NETLINK_AUDIT		9	/* auditing */
+#define NETLINK_FIB_LOOKUP	10	
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
-- 
cgit v1.2.3


From e3a15db2415579d5136b9ba9b52fe27c66da8780 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:31:08 -0500
Subject: [PATCH] sysfs_{create|remove}_link should take const char *

sysfs: make sysfs_{create|remove}_link to take const char * name.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 38b58b30814a..931b5aaaf380 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -105,11 +105,11 @@ sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode);
 extern void
 sysfs_remove_file(struct kobject *, const struct attribute *);
 
-extern int 
-sysfs_create_link(struct kobject * kobj, struct kobject * target, char * name);
+extern int
+sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name);
 
 extern void
-sysfs_remove_link(struct kobject *, char * name);
+sysfs_remove_link(struct kobject *, const char * name);
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr);
 int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr);
@@ -153,12 +153,12 @@ static inline void sysfs_remove_file(struct kobject * k, const struct attribute
 	;
 }
 
-static inline int sysfs_create_link(struct kobject * k, struct kobject * t, char * n)
+static inline int sysfs_create_link(struct kobject * k, struct kobject * t, const char * n)
 {
 	return 0;
 }
 
-static inline void sysfs_remove_link(struct kobject * k, char * name)
+static inline void sysfs_remove_link(struct kobject * k, const char * name)
 {
 	;
 }
-- 
cgit v1.2.3


From f3b4f3c6dec04c6c8261fe22645f07b39976595a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:32:00 -0500
Subject: [PATCH] Make kobject's name be const char *

kobject: make kobject's name const char * since users should not
	 attempt to change it (except by calling kobject_rename).

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 765d660d3bea..76dc67245c0c 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -33,7 +33,7 @@
 extern u64 hotplug_seqnum;
 
 struct kobject {
-	char			* k_name;
+	const char		* k_name;
 	char			name[KOBJ_NAME_LEN];
 	struct kref		kref;
 	struct list_head	entry;
@@ -46,7 +46,7 @@ struct kobject {
 extern int kobject_set_name(struct kobject *, const char *, ...)
 	__attribute__((format(printf,2,3)));
 
-static inline char * kobject_name(struct kobject * kobj)
+static inline const char * kobject_name(const struct kobject * kobj)
 {
 	return kobj->k_name;
 }
@@ -57,7 +57,7 @@ extern void kobject_cleanup(struct kobject *);
 extern int kobject_add(struct kobject *);
 extern void kobject_del(struct kobject *);
 
-extern int kobject_rename(struct kobject *, char *new_name);
+extern int kobject_rename(struct kobject *, const char *new_name);
 
 extern int kobject_register(struct kobject *);
 extern void kobject_unregister(struct kobject *);
-- 
cgit v1.2.3


From 419cab3fc69588ebe35b845cc3a584ae172463de Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:32:54 -0500
Subject: [PATCH] kset_hotplug_ops->name shoudl return const char *

kobject: change name() method in kset_hotplug_ops return const char *
	 since users shoudl not try to modify returned data.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 76dc67245c0c..3b22304f12fd 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -94,7 +94,7 @@ struct kobj_type {
  */
 struct kset_hotplug_ops {
 	int (*filter)(struct kset *kset, struct kobject *kobj);
-	char *(*name)(struct kset *kset, struct kobject *kobj);
+	const char *(*name)(struct kset *kset, struct kobject *kobj);
 	int (*hotplug)(struct kset *kset, struct kobject *kobj, char **envp,
 			int num_envp, char *buffer, int buffer_size);
 };
-- 
cgit v1.2.3


From 8d790d74085833ba2a3e84b5bcd683be4981c29a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:34:05 -0500
Subject: [PATCH] make driver's name be const char *

Driver core:
  change driver's, bus's, class's and platform device's names
  to be const char * so one can use
            const char *drv_name = "asdfg";
  when initializing structures.
  Also kill couple of whitespaces.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index df94c0de53f2..fa9e6ca08f5a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -47,7 +47,7 @@ struct class_device;
 struct class_simple;
 
 struct bus_type {
-	char			* name;
+	const char		* name;
 
 	struct subsystem	subsys;
 	struct kset		drivers;
@@ -98,17 +98,17 @@ extern int bus_create_file(struct bus_type *, struct bus_attribute *);
 extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
 
 struct device_driver {
-	char			* name;
+	const char		* name;
 	struct bus_type		* bus;
 
 	struct completion	unloaded;
 	struct kobject		kobj;
 	struct list_head	devices;
 
-	struct module 		* owner;
+	struct module		* owner;
 
 	int	(*probe)	(struct device * dev);
-	int 	(*remove)	(struct device * dev);
+	int	(*remove)	(struct device * dev);
 	void	(*shutdown)	(struct device * dev);
 	int	(*suspend)	(struct device * dev, pm_message_t state, u32 level);
 	int	(*resume)	(struct device * dev, u32 level);
@@ -142,7 +142,7 @@ extern void driver_remove_file(struct device_driver *, struct driver_attribute *
  * device classes
  */
 struct class {
-	char			* name;
+	const char		* name;
 
 	struct subsystem	subsys;
 	struct list_head	children;
@@ -366,7 +366,7 @@ extern struct device *device_find(const char *name, struct bus_type *bus);
 /* drivers/base/platform.c */
 
 struct platform_device {
-	char		* name;
+	const char	* name;
 	u32		id;
 	struct device	dev;
 	u32		num_resources;
-- 
cgit v1.2.3


From d48593bf208e0d046c35fb0707ae5b23fef8c4ff Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Fri, 29 Apr 2005 00:58:46 -0500
Subject: [PATCH] Make attributes names const char *

sysfs: make attributes and attribute_group's names const char *

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 931b5aaaf380..d9cd2d31d377 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -16,13 +16,13 @@ struct kobject;
 struct module;
 
 struct attribute {
-	char			* name;
+	const char		* name;
 	struct module 		* owner;
 	mode_t			mode;
 };
 
 struct attribute_group {
-	char			* name;
+	const char		* name;
 	struct attribute	** attrs;
 };
 
-- 
cgit v1.2.3


From e9ba6365fd4f0d9e7d022c883bd044fbaa48257f Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Tue, 15 Mar 2005 11:54:21 -0800
Subject: [PATCH] CLASS: move a "simple" class logic into the class core.

One step on improving the class api so that it can not be used incorrectly.
This also fixes the module owner issue with the dev files that happened when
the devt logic moved to the class core.

Based on a patch originally written by Kay Sievers <kay.sievers@vrfy.org>

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index fa9e6ca08f5a..73250d01c01f 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -143,6 +143,7 @@ extern void driver_remove_file(struct device_driver *, struct driver_attribute *
  */
 struct class {
 	const char		* name;
+	struct module		* owner;
 
 	struct subsystem	subsys;
 	struct list_head	children;
@@ -185,6 +186,7 @@ struct class_device {
 	struct kobject		kobj;
 	struct class		* class;	/* required */
 	dev_t			devt;		/* dev_t, creates the sysfs "dev" */
+	struct class_device_attribute *devt_attr;
 	struct device		* dev;		/* not necessary, but nice to have */
 	void			* class_data;	/* class-specific data */
 
@@ -245,6 +247,13 @@ struct class_interface {
 extern int class_interface_register(struct class_interface *);
 extern void class_interface_unregister(struct class_interface *);
 
+extern struct class *class_create(struct module *owner, char *name);
+extern void class_destroy(struct class *cls);
+extern struct class_device *class_device_create(struct class *cls, dev_t devt,
+						struct device *device, char *fmt, ...)
+					__attribute__((format(printf,4,5)));
+extern void class_device_destroy(struct class *cls, dev_t devt);
+
 /* interface for class simple stuff */
 extern struct class_simple *class_simple_create(struct module *owner, char *name);
 extern void class_simple_destroy(struct class_simple *cs);
-- 
cgit v1.2.3


From 1235686f6e67cf30c460eb77d90a6cb4be57b92f Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Tue, 15 Mar 2005 14:26:30 -0800
Subject: [PATCH] INPUT: move to use the new class code, instead of
 class_simple

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/input.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 72731d7d189e..9d9598ed760d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1015,7 +1015,7 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min
 	dev->absbit[LONG(axis)] |= BIT(axis);
 }
 
-extern struct class_simple *input_class;
+extern struct class *input_class;
 
 #endif
 #endif
-- 
cgit v1.2.3


From 8561b10f6e7ef0a085709ffc844f74130a067abe Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Tue, 15 Mar 2005 15:10:13 -0800
Subject: [PATCH] USB: move the usb hcd code to use the new class code.

This moves a kref into the main hcd structure, which detaches it from
the class device structure.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 2d1ac5058534..3d508bf08402 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -287,15 +287,14 @@ struct usb_bus {
 
 	struct dentry *usbfs_dentry;	/* usbfs dentry entry for the bus */
 
-	struct class_device class_dev;	/* class device for this bus */
+	struct class_device *class_dev;	/* class device for this bus */
+	struct kref kref;		/* handles reference counting this bus */
 	void (*release)(struct usb_bus *bus);	/* function to destroy this bus's memory */
 #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 	struct mon_bus *mon_bus;	/* non-null when associated */
 	int monitored;			/* non-zero when monitored */
 #endif
 };
-#define	to_usb_bus(d) container_of(d, struct usb_bus, class_dev)
-
 
 /* -------------------------------------------------------------------------- */
 
-- 
cgit v1.2.3


From cd987d38cc59053e0bab8150ffaca33b109067f3 Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Wed, 23 Mar 2005 11:12:38 -0800
Subject: [PATCH] class: remove class_simple code, as no one in the tree is
 using it anymore.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 73250d01c01f..68a95358013d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -44,7 +44,6 @@ struct device;
 struct device_driver;
 struct class;
 struct class_device;
-struct class_simple;
 
 struct bus_type {
 	const char		* name;
@@ -254,15 +253,6 @@ extern struct class_device *class_device_create(struct class *cls, dev_t devt,
 					__attribute__((format(printf,4,5)));
 extern void class_device_destroy(struct class *cls, dev_t devt);
 
-/* interface for class simple stuff */
-extern struct class_simple *class_simple_create(struct module *owner, char *name);
-extern void class_simple_destroy(struct class_simple *cs);
-extern struct class_device *class_simple_device_add(struct class_simple *cs, dev_t dev, struct device *device, const char *fmt, ...)
-	__attribute__((format(printf,4,5)));
-extern int class_simple_set_hotplug(struct class_simple *, 
-	int (*hotplug)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size));
-extern void class_simple_device_remove(dev_t dev);
-
 
 struct device {
 	struct list_head node;		/* node in sibling list */
-- 
cgit v1.2.3


From af70316af182f4716cc5eec7e0d27fc731d164bd Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 10:41:04 -0800
Subject: [PATCH] Add a semaphore to struct device to synchronize calls to its
 driver.

This adds a per-device semaphore that is taken before every call from the core to a
driver method. This prevents e.g. simultaneous calls to the ->suspend() or ->resume()
and ->probe() or ->release(), potentially saving a whole lot of headaches.

It also moves us a step closer to removing the bus rwsem, since it protects the fields
in struct device that are modified by the core.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 68a95358013d..195b327f3e19 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -264,6 +264,10 @@ struct device {
 	struct kobject kobj;
 	char	bus_id[BUS_ID_SIZE];	/* position on parent bus */
 
+	struct semaphore	sem;	/* semaphore to synchronize calls to
+					 * its driver.
+					 */
+
 	struct bus_type	* bus;		/* type of bus device is on */
 	struct device_driver *driver;	/* which driver has allocated this
 					   device */
-- 
cgit v1.2.3


From fae3cd00255e3e51ffd59fedb1bdb91ec96be395 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 10:59:56 -0800
Subject: [PATCH] Add driver_for_each_device().

Now there's an iterator for accessing each device bound to a driver.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

Index: linux-2.6.12-rc2/drivers/base/driver.c
===================================================================
---
 include/linux/device.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 195b327f3e19..10f5aa20e041 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -136,6 +136,9 @@ struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store)
 extern int driver_create_file(struct device_driver *, struct driver_attribute *);
 extern void driver_remove_file(struct device_driver *, struct driver_attribute *);
 
+extern int driver_for_each_device(struct device_driver * drv, struct device * start,
+				  void * data, int (*fn)(struct device *, void *));
+
 
 /*
  * device classes
-- 
cgit v1.2.3


From 9a19fea43616066561e221359596ce532e631395 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 11:45:16 -0800
Subject: [PATCH] Add initial implementation of klist helpers.

This klist interface provides a couple of structures that wrap around
struct list_head to provide explicit list "head" (struct klist) and
list "node" (struct klist_node) objects. For struct klist, a spinlock
is included that protects access to the actual list itself. struct
klist_node provides a pointer to the klist that owns it and a kref
reference count that indicates the number of current users of that node
in the list.

The entire point is to provide an interface for iterating over a list
that is safe and allows for modification of the list during the
iteration (e.g. insertion and removal), including modification of the
current node on the list.

It works using a 3rd object type - struct klist_iter - that is declared
and initialized before an iteration. klist_next() is used to acquire the
next element in the list. It returns NULL if there are no more items.
This klist interface provides a couple of structures that wrap around
struct list_head to provide explicit list "head" (struct klist) and
list "node" (struct klist_node) objects. For struct klist, a spinlock
is included that protects access to the actual list itself. struct
klist_node provides a pointer to the klist that owns it and a kref
reference count that indicates the number of current users of that node
in the list.

The entire point is to provide an interface for iterating over a list
that is safe and allows for modification of the list during the
iteration (e.g. insertion and removal), including modification of the
current node on the list.

It works using a 3rd object type - struct klist_iter - that is declared
and initialized before an iteration. klist_next() is used to acquire the
next element in the list. It returns NULL if there are no more items.
Internally, that routine takes the klist's lock, decrements the reference
count of the previous klist_node and increments the count of the next
klist_node. It then drops the lock and returns.

There are primitives for adding and removing nodes to/from a klist.
When deleting, klist_del() will simply decrement the reference count.
Only when the count goes to 0 is the node removed from the list.
klist_remove() will try to delete the node from the list and block
until it is actually removed. This is useful for objects (like devices)
that have been removed from the system and must be freed (but must wait
until all accessors have finished).

Internally, that routine takes the klist's lock, decrements the reference
count of the previous klist_node and increments the count of the next
klist_node. It then drops the lock and returns.

There are primitives for adding and removing nodes to/from a klist.
When deleting, klist_del() will simply decrement the reference count.
Only when the count goes to 0 is the node removed from the list.
klist_remove() will try to delete the node from the list and block
until it is actually removed. This is useful for objects (like devices)
that have been removed from the system and must be freed (but must wait
until all accessors have finished).

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff -Nru a/include/linux/klist.h b/include/linux/klist.h
---
 include/linux/klist.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 include/linux/klist.h

(limited to 'include/linux')

diff --git a/include/linux/klist.h b/include/linux/klist.h
new file mode 100644
index 000000000000..fb52f9d9d611
--- /dev/null
+++ b/include/linux/klist.h
@@ -0,0 +1,53 @@
+/*
+ *	klist.h - Some generic list helpers, extending struct list_head a bit.
+ *
+ *	Implementations are found in lib/klist.c
+ *
+ *
+ *	Copyright (C) 2005 Patrick Mochel
+ *
+ *	This file is rleased under the GPL v2.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+
+
+struct klist {
+	spinlock_t		k_lock;
+	struct list_head	k_list;
+};
+
+
+extern void klist_init(struct klist * k);
+
+
+struct klist_node {
+	struct klist		* n_klist;
+	struct list_head	n_node;
+	struct kref		n_ref;
+	struct completion	n_removed;
+};
+
+extern void klist_add_tail(struct klist * k, struct klist_node * n);
+extern void klist_add_head(struct klist * k, struct klist_node * n);
+
+extern void klist_del(struct klist_node * n);
+extern void klist_remove(struct klist_node * n);
+
+
+struct klist_iter {
+	struct klist		* i_klist;
+	struct list_head	* i_head;
+	struct klist_node	* i_cur;
+};
+
+
+extern void klist_iter_init(struct klist * k, struct klist_iter * i);
+extern void klist_iter_init_node(struct klist * k, struct klist_iter * i, 
+				 struct klist_node * n);
+extern void klist_iter_exit(struct klist_iter * i);
+extern struct klist_node * klist_next(struct klist_iter * i);
+
-- 
cgit v1.2.3


From 465c7a3a3a5aabcedd2e43612cac5a12f23da19a Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 11:49:14 -0800
Subject: [PATCH] Add a klist to struct bus_type for its devices.

- Use it for bus_for_each_dev().
- Use the klist spinlock instead of the bus rwsem.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 10f5aa20e041..e36953cf7f14 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -14,6 +14,7 @@
 #include <linux/config.h>
 #include <linux/ioport.h>
 #include <linux/kobject.h>
+#include <linux/klist.h>
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/module.h>
@@ -51,6 +52,7 @@ struct bus_type {
 	struct subsystem	subsys;
 	struct kset		drivers;
 	struct kset		devices;
+	struct klist		klist_devices;
 
 	struct bus_attribute	* bus_attrs;
 	struct device_attribute	* dev_attrs;
@@ -262,6 +264,7 @@ struct device {
 	struct list_head bus_list;	/* node in bus's list */
 	struct list_head driver_list;
 	struct list_head children;
+	struct klist_node	knode_bus;
 	struct device 	* parent;
 
 	struct kobject kobj;
-- 
cgit v1.2.3


From 38fdac3cdce276554b4484a41f8ec2daf81cb2ff Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 12:00:18 -0800
Subject: [PATCH] Add a klist to struct bus_type for its drivers.

- Use it in bus_for_each_drv().
- Use the klist spinlock instead of the bus rwsem.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index e36953cf7f14..ea9ab33dfe71 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -53,6 +53,7 @@ struct bus_type {
 	struct kset		drivers;
 	struct kset		devices;
 	struct klist		klist_devices;
+	struct klist		klist_drivers;
 
 	struct bus_attribute	* bus_attrs;
 	struct device_attribute	* dev_attrs;
@@ -105,6 +106,7 @@ struct device_driver {
 	struct completion	unloaded;
 	struct kobject		kobj;
 	struct list_head	devices;
+	struct klist_node	knode_bus;
 
 	struct module		* owner;
 
-- 
cgit v1.2.3


From 94e7b1c5ff2055571703e38b059afffe17658432 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 12:25:36 -0800
Subject: [PATCH] Add a klist to struct device_driver for the devices bound to
 it.

- Use it in driver_for_each_device() instead of the regular list_head and stop using
  the bus's rwsem for protection.
- Use driver_for_each_device() in driver_detach() so we don't deadlock on the
  bus's rwsem.
- Remove ->devices.
- Move klist access and sysfs link access out from under device's semaphore, since
  they're synchronized through other means.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index ea9ab33dfe71..96c71b59fdef 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -105,7 +105,7 @@ struct device_driver {
 
 	struct completion	unloaded;
 	struct kobject		kobj;
-	struct list_head	devices;
+	struct klist		klist_devices;
 	struct klist_node	knode_bus;
 
 	struct module		* owner;
@@ -266,6 +266,7 @@ struct device {
 	struct list_head bus_list;	/* node in bus's list */
 	struct list_head driver_list;
 	struct list_head children;
+	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
 	struct device 	* parent;
 
-- 
cgit v1.2.3


From cb85b6f1cc811ecb9ed4b950206d8941ba710e68 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 10:48:35 -0800
Subject: [PATCH] Remove the unused device_find().

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 96c71b59fdef..f1c38d869ac9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -372,7 +372,6 @@ extern int (*platform_notify_remove)(struct device * dev);
  */
 extern struct device * get_device(struct device * dev);
 extern void put_device(struct device * dev);
-extern struct device *device_find(const char *name, struct bus_type *bus);
 
 
 /* drivers/base/platform.c */
-- 
cgit v1.2.3


From 8b0c250be489dcbf1a3a33bb4ec4c7f33735a365 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 12:58:57 -0800
Subject: [PATCH] add klist_node_attached() to determine if a node is on a list
 or not.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff -Nru a/include/linux/klist.h b/include/linux/klist.h
---
 include/linux/klist.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/klist.h b/include/linux/klist.h
index fb52f9d9d611..eebf5e5696ec 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -37,6 +37,8 @@ extern void klist_add_head(struct klist * k, struct klist_node * n);
 extern void klist_del(struct klist_node * n);
 extern void klist_remove(struct klist_node * n);
 
+extern int klist_node_attached(struct klist_node * n);
+
 
 struct klist_iter {
 	struct klist		* i_klist;
-- 
cgit v1.2.3


From 7dc35cdf69149a7f2b5216ada9bafe359746ac1c Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 13:03:35 -0800
Subject: [PATCH] Remove struct device::bus_list.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index f1c38d869ac9..13f65853e583 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -263,7 +263,6 @@ extern void class_device_destroy(struct class *cls, dev_t devt);
 
 struct device {
 	struct list_head node;		/* node in sibling list */
-	struct list_head bus_list;	/* node in bus's list */
 	struct list_head driver_list;
 	struct list_head children;
 	struct klist_node	knode_driver;
-- 
cgit v1.2.3


From 63c4f204ffc8219696bda88eb20c9873d007a2fc Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 13:08:05 -0800
Subject: [PATCH] Remove struct device::driver_list.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 13f65853e583..d2434934d091 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -263,7 +263,6 @@ extern void class_device_destroy(struct class *cls, dev_t devt);
 
 struct device {
 	struct list_head node;		/* node in sibling list */
-	struct list_head driver_list;
 	struct list_head children;
 	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
-- 
cgit v1.2.3


From 36239577cfb6b9a7c111209536b54200b0252ebf Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 19:08:30 -0800
Subject: [PATCH] Use a klist for device child lists.

- Use klist iterator in device_for_each_child(), making it safe to use for
  removing devices.
- Remove unused list_to_dev() function.
- Kills all usage of devices_subsys.rwsem.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index d2434934d091..43249260cd1c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -262,8 +262,8 @@ extern void class_device_destroy(struct class *cls, dev_t devt);
 
 
 struct device {
-	struct list_head node;		/* node in sibling list */
-	struct list_head children;
+	struct klist		klist_children;
+	struct klist_node	knode_parent;		/* node in sibling list */
 	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
 	struct device 	* parent;
@@ -298,12 +298,6 @@ struct device {
 	void	(*release)(struct device * dev);
 };
 
-static inline struct device *
-list_to_dev(struct list_head *node)
-{
-	return list_entry(node, struct device, node);
-}
-
 static inline void *
 dev_get_drvdata (struct device *dev)
 {
-- 
cgit v1.2.3


From 0d3e5a2e39b6ba2974e9e7c2a429018c45de8e76 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@digitalimplant.org>
Date: Tue, 5 Apr 2005 23:46:33 -0700
Subject: [PATCH] Driver Core: fix bk-driver-core kills ppc64

There's no check to see if the device is already bound to a driver, which
could do bad things.  The first thing to go wrong is that it will try to match
a driver with a device already bound to one.  In some cases (it appears with
USB with drivers/usb/core/usb.c::usb_match_id()), some drivers will match a
device based on the class type, so it would be common (especially for HID
devices) to match a device that is already bound.

The fun comes when ->probe() is called, it fails, then
driver_probe_device() does this:

	dev->driver = NULL;

Later on, that pointer could be be dereferenced without checking and cause
hell to break loose.

This problem could be nasty. It's very hardware dependent, since some
devices could have a different set of matching qualifiers than others.

Now, I don't quite see exactly where/how you were getting that crash.
You're dereferencing bad memory, but I'm not sure which pointer was bad
and where it came from, but it could have come from a couple of different
places.

The patch below will hopefully fix it all up for you. It's against
2.6.12-rc2-mm1, and does the following:

- Move logic to driver_probe_device() and comments uncommon returns:
  1 - If device is bound
  0 - If device not bound, and no error
  error - If there was an error.

- Move locking to caller of that function, since we want to lock a
  device for the entire time we're trying to bind it to a driver (to
  prevent against a driver being loaded at the same time).

- Update __device_attach() and __driver_attach() to do that locking.

- Check if device is already bound in __driver_attach()

- Update the converse device_release_driver() so it locks the device
  around all of the operations.

- Mark driver_probe_device() as static and remove export. It's an
  internal function, it should stay that way, and there are no other
  callers. If there is ever a need to export it, we can audit it as
  necessary.

Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 43249260cd1c..91aac349b9a7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -325,7 +325,6 @@ extern int device_for_each_child(struct device *, void *,
  * Manual binding of a device to driver. See drivers/base/bus.c
  * for information on use.
  */
-extern int  driver_probe_device(struct device_driver * drv, struct device * dev);
 extern void device_bind_driver(struct device * dev);
 extern void device_release_driver(struct device * dev);
 extern int  device_attach(struct device * dev);
-- 
cgit v1.2.3


From 4b45099b75832434c5113b9aed1499f8a69d13d5 Mon Sep 17 00:00:00 2001
From: Keiichiro Tokunaga <tokunaga.keiich@jp.fujitsu.com>
Date: Sun, 8 May 2005 21:28:53 +0900
Subject: [PATCH] Driver core: unregister_node() for hotplug use

This adds a generic function 'unregister_node()'.
It is used to remove objects of a node going away
for hotplug.  All the devices on the node must be
unregistered before calling this function.

Signed-off-by: Keiichiro Tokunaga <tokunaga.keiich@jp.fujitsu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff -puN drivers/base/node.c~numa_hp_base drivers/base/node.c
---
 include/linux/node.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/node.h b/include/linux/node.h
index 6e0a697e594e..254dc3de650b 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -27,6 +27,7 @@ struct node {
 };
 
 extern int register_node(struct node *, int, struct node *);
+extern void unregister_node(struct node *node);
 
 #define to_node(sys_device) container_of(sys_device, struct node, sysdev)
 
-- 
cgit v1.2.3


From acaefc25d21f850e47ecc5098d1e0bc442c526be Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 May 2005 14:40:59 +0200
Subject: [PATCH] libfs: add simple attribute files

Based on the discussion about spufs attributes, this is my suggestion
for a more generic attribute file support that can be used by both
debugfs and spufs.

Simple attribute files behave similarly to sequential files from
a kernel programmers perspective in that a standard set of file
operations is provided and only an open operation needs to
be written that registers file specific get() and set() functions.

These operations are defined as

void foo_set(void *data, u64 val); and
u64 foo_get(void *data);

where data is the inode->u.generic_ip pointer of the file and the
operations just need to make send of that pointer. The infrastructure
makes sure this works correctly with concurrent access and partial
read calls.

A macro named DEFINE_SIMPLE_ATTRIBUTE is provided to further simplify
using the attributes.

This patch already contains the changes for debugfs to use attributes
for its internal file operations.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/fs.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0180102dace1..9b8b696d4f15 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1657,6 +1657,52 @@ static inline void simple_transaction_set(struct file *file, size_t n)
 	ar->size = n;
 }
 
+/*
+ * simple attribute files
+ *
+ * These attributes behave similar to those in sysfs:
+ *
+ * Writing to an attribute immediately sets a value, an open file can be
+ * written to multiple times.
+ *
+ * Reading from an attribute creates a buffer from the value that might get
+ * read with multiple read calls. When the attribute has been read
+ * completely, no further read calls are possible until the file is opened
+ * again.
+ *
+ * All attributes contain a text representation of a numeric value
+ * that are accessed with the get() and set() functions.
+ */
+#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)		\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return simple_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static struct file_operations __fops = {				\
+	.owner	 = THIS_MODULE,						\
+	.open	 = __fops ## _open,					\
+	.release = simple_attr_close,					\
+	.read	 = simple_attr_read,					\
+	.write	 = simple_attr_write,					\
+};
+
+static inline void __attribute__((format(printf, 1, 2)))
+__simple_attr_check_format(const char *fmt, ...)
+{
+	/* don't do anything, just let the compiler check the arguments; */
+}
+
+int simple_attr_open(struct inode *inode, struct file *file,
+		     u64 (*get)(void *), void (*set)(void *, u64),
+		     const char *fmt);
+int simple_attr_close(struct inode *inode, struct file *file);
+ssize_t simple_attr_read(struct file *file, char __user *buf,
+			 size_t len, loff_t *ppos);
+ssize_t simple_attr_write(struct file *file, const char __user *buf,
+			  size_t len, loff_t *ppos);
+
+
 #ifdef CONFIG_SECURITY
 static inline char *alloc_secdata(void)
 {
-- 
cgit v1.2.3


From 54b6f35c99974e99e64c05c2895718355123c55f Mon Sep 17 00:00:00 2001
From: Yani Ioannou <yani.ioannou@gmail.com>
Date: Tue, 17 May 2005 06:39:34 -0400
Subject: [PATCH] Driver core: change device_attribute callbacks

This patch adds the device_attribute paramerter to the
device_attribute store and show sysfs callback functions, and passes a
reference to the attribute when the callbacks are called.

Signed-off-by: Yani Ioannou <yani.ioannou@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 91aac349b9a7..7b781a72b293 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -335,8 +335,10 @@ extern void driver_attach(struct device_driver * drv);
 
 struct device_attribute {
 	struct attribute	attr;
-	ssize_t (*show)(struct device * dev, char * buf);
-	ssize_t (*store)(struct device * dev, const char * buf, size_t count);
+	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
 };
 
 #define DEVICE_ATTR(_name,_mode,_show,_store) \
-- 
cgit v1.2.3


From 0a3e7eeabc9f76b7496488aad2d11626ff6a2a4f Mon Sep 17 00:00:00 2001
From: Yani Ioannou <yani.ioannou@gmail.com>
Date: Tue, 17 May 2005 22:59:05 -0400
Subject: [PATCH] I2C: add i2c sensor_device_attribute and macros

This patch creates a new header with a potential standard i2c sensor
attribute type (which simply includes an int representing the sensor
number/index) and the associated macros, SENSOR_DEVICE_ATTR to define
a static attribute and to_sensor_dev_attr to get a
sensor_device_attribute reference from an embedded device_attribute
reference.

Signed-off-by: Yani Ioannou <yani.ioannou@gmail.com>
---
 include/linux/i2c-sysfs.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 include/linux/i2c-sysfs.h

(limited to 'include/linux')

diff --git a/include/linux/i2c-sysfs.h b/include/linux/i2c-sysfs.h
new file mode 100644
index 000000000000..d7bf6ce11679
--- /dev/null
+++ b/include/linux/i2c-sysfs.h
@@ -0,0 +1,36 @@
+/*
+ *  i2c-sysfs.h - i2c chip driver sysfs defines
+ *
+ *  Copyright (C) 2005 Yani Ioannou <yani.ioannou@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _LINUX_I2C_SYSFS_H
+#define _LINUX_I2C_SYSFS_H
+
+struct sensor_device_attribute{
+	struct device_attribute dev_attr;
+	int index;
+};
+#define to_sensor_dev_attr(_dev_attr) \
+	container_of(_dev_attr, struct sensor_device_attribute, dev_attr)
+
+#define SENSOR_DEVICE_ATTR(_name,_mode,_show,_store,_index)	\
+struct sensor_device_attribute sensor_dev_attr_##_name = {	\
+	.dev_attr =	__ATTR(_name,_mode,_show,_store),	\
+	.index =	_index,					\
+}
+
+#endif /* _LINUX_I2C_SYSFS_H */
-- 
cgit v1.2.3


From 988d186de5b6966a71a8cc52e6cb4895fd2f7799 Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Tue, 31 May 2005 10:39:14 +0530
Subject: [PATCH] sysfs-iattr: add sysfs_setattr

o This adds ->i_op->setattr VFS method for sysfs inodes. The changed
  attribues are saved in the persistent sysfs_dirent structure as a pointer
  to struct iattr. The struct iattr is allocated only for those sysfs_dirent's
  for which default attributes are getting changed. Thanks to Jon Smirl for
  this suggestion.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d9cd2d31d377..392da5a6dacb 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -73,6 +73,7 @@ struct sysfs_dirent {
 	int			s_type;
 	umode_t			s_mode;
 	struct dentry		* s_dentry;
+	struct iattr		* s_iattr;
 };
 
 #define SYSFS_ROOT		0x0001
-- 
cgit v1.2.3


From 18b504e25fd617bee8830d2cdcaff7fb7b5931bb Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Tue, 21 Jun 2005 12:38:48 -0700
Subject: [NETLINK]: netlink_callback structure needs 5 args not 4

net/ipv4/tcp_diag.c uses up to ->args[4]

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 561d4dc75836..3029cad63a01 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -147,7 +147,7 @@ struct netlink_callback
 	int		(*dump)(struct sk_buff * skb, struct netlink_callback *cb);
 	int		(*done)(struct netlink_callback *cb);
 	int		family;
-	long		args[4];
+	long		args[5];
 };
 
 struct netlink_notify
-- 
cgit v1.2.3


From e45b1be8bcb3643808975a426fa3e201a2588e87 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 21 Jun 2005 14:01:30 -0700
Subject: [NETFILTER]: Kill lockhelp.h

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack_core.h |   3 +-
 include/linux/netfilter_ipv4/ip_nat.h            |   3 +-
 include/linux/netfilter_ipv4/listhelp.h          |   1 -
 include/linux/netfilter_ipv4/lockhelp.h          | 129 -----------------------
 4 files changed, 2 insertions(+), 134 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/lockhelp.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
index d84be02cb4fc..694aec9b4784 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -1,7 +1,6 @@
 #ifndef _IP_CONNTRACK_CORE_H
 #define _IP_CONNTRACK_CORE_H
 #include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/lockhelp.h>
 
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
@@ -47,6 +46,6 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb)
 
 extern struct list_head *ip_conntrack_hash;
 extern struct list_head ip_conntrack_expect_list;
-DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
+extern rwlock_t ip_conntrack_lock;
 #endif /* _IP_CONNTRACK_CORE_H */
 
diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index 2b72b86176f0..e201ec6e9905 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -50,10 +50,9 @@ struct ip_nat_multi_range_compat
 
 #ifdef __KERNEL__
 #include <linux/list.h>
-#include <linux/netfilter_ipv4/lockhelp.h>
 
 /* Protects NAT hash tables, and NAT-private part of conntracks. */
-DECLARE_RWLOCK_EXTERN(ip_nat_lock);
+extern rwlock_t ip_nat_lock;
 
 /* The structure embedded in the conntrack structure. */
 struct ip_nat_info
diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h
index f2ae7c5e57bb..360429f48737 100644
--- a/include/linux/netfilter_ipv4/listhelp.h
+++ b/include/linux/netfilter_ipv4/listhelp.h
@@ -2,7 +2,6 @@
 #define _LISTHELP_H
 #include <linux/config.h>
 #include <linux/list.h>
-#include <linux/netfilter_ipv4/lockhelp.h>
 
 /* Header to do more comprehensive job than linux/list.h; assume list
    is first entry in structure. */
diff --git a/include/linux/netfilter_ipv4/lockhelp.h b/include/linux/netfilter_ipv4/lockhelp.h
deleted file mode 100644
index a3288633ab46..000000000000
--- a/include/linux/netfilter_ipv4/lockhelp.h
+++ /dev/null
@@ -1,129 +0,0 @@
-#ifndef _LOCKHELP_H
-#define _LOCKHELP_H
-#include <linux/config.h>
-
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-
-/* Header to do help in lock debugging. */
-
-#ifdef CONFIG_NETFILTER_DEBUG
-struct spinlock_debug
-{
-	spinlock_t l;
-	atomic_t locked_by;
-};
-
-struct rwlock_debug
-{
-	rwlock_t l;
-	long read_locked_map;
-	long write_locked_map;
-};
-
-#define DECLARE_LOCK(l) 						\
-struct spinlock_debug l = { SPIN_LOCK_UNLOCKED, ATOMIC_INIT(-1) }
-#define DECLARE_LOCK_EXTERN(l) 			\
-extern struct spinlock_debug l
-#define DECLARE_RWLOCK(l)				\
-struct rwlock_debug l = { RW_LOCK_UNLOCKED, 0, 0 }
-#define DECLARE_RWLOCK_EXTERN(l)		\
-extern struct rwlock_debug l
-
-#define MUST_BE_LOCKED(l)						\
-do { if (atomic_read(&(l)->locked_by) != smp_processor_id())		\
-	printk("ASSERT %s:%u %s unlocked\n", __FILE__, __LINE__, #l);	\
-} while(0)
-
-#define MUST_BE_UNLOCKED(l)						\
-do { if (atomic_read(&(l)->locked_by) == smp_processor_id())		\
-	printk("ASSERT %s:%u %s locked\n", __FILE__, __LINE__, #l);	\
-} while(0)
-
-/* Write locked OK as well. */
-#define MUST_BE_READ_LOCKED(l)						    \
-do { if (!((l)->read_locked_map & (1UL << smp_processor_id()))		    \
-	 && !((l)->write_locked_map & (1UL << smp_processor_id())))	    \
-	printk("ASSERT %s:%u %s not readlocked\n", __FILE__, __LINE__, #l); \
-} while(0)
-
-#define MUST_BE_WRITE_LOCKED(l)						     \
-do { if (!((l)->write_locked_map & (1UL << smp_processor_id())))	     \
-	printk("ASSERT %s:%u %s not writelocked\n", __FILE__, __LINE__, #l); \
-} while(0)
-
-#define MUST_BE_READ_WRITE_UNLOCKED(l)					  \
-do { if ((l)->read_locked_map & (1UL << smp_processor_id()))		  \
-	printk("ASSERT %s:%u %s readlocked\n", __FILE__, __LINE__, #l);	  \
- else if ((l)->write_locked_map & (1UL << smp_processor_id()))		  \
-	 printk("ASSERT %s:%u %s writelocked\n", __FILE__, __LINE__, #l); \
-} while(0)
-
-#define LOCK_BH(lk)						\
-do {								\
-	MUST_BE_UNLOCKED(lk);					\
-	spin_lock_bh(&(lk)->l);					\
-	atomic_set(&(lk)->locked_by, smp_processor_id());	\
-} while(0)
-
-#define UNLOCK_BH(lk)				\
-do {						\
-	MUST_BE_LOCKED(lk);			\
-	atomic_set(&(lk)->locked_by, -1);	\
-	spin_unlock_bh(&(lk)->l);		\
-} while(0)
-
-#define READ_LOCK(lk) 						\
-do {								\
-	MUST_BE_READ_WRITE_UNLOCKED(lk);			\
-	read_lock_bh(&(lk)->l);					\
-	set_bit(smp_processor_id(), &(lk)->read_locked_map);	\
-} while(0)
-
-#define WRITE_LOCK(lk)							  \
-do {									  \
-	MUST_BE_READ_WRITE_UNLOCKED(lk);				  \
-	write_lock_bh(&(lk)->l);					  \
-	set_bit(smp_processor_id(), &(lk)->write_locked_map);		  \
-} while(0)
-
-#define READ_UNLOCK(lk)							\
-do {									\
-	if (!((lk)->read_locked_map & (1UL << smp_processor_id())))	\
-		printk("ASSERT: %s:%u %s not readlocked\n", 		\
-		       __FILE__, __LINE__, #lk);			\
-	clear_bit(smp_processor_id(), &(lk)->read_locked_map);		\
-	read_unlock_bh(&(lk)->l);					\
-} while(0)
-
-#define WRITE_UNLOCK(lk)					\
-do {								\
-	MUST_BE_WRITE_LOCKED(lk);				\
-	clear_bit(smp_processor_id(), &(lk)->write_locked_map);	\
-	write_unlock_bh(&(lk)->l);				\
-} while(0)
-
-#else
-#define DECLARE_LOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
-#define DECLARE_LOCK_EXTERN(l) extern spinlock_t l
-#define DECLARE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED
-#define DECLARE_RWLOCK_EXTERN(l) extern rwlock_t l
-
-#define MUST_BE_LOCKED(l)
-#define MUST_BE_UNLOCKED(l)
-#define MUST_BE_READ_LOCKED(l)
-#define MUST_BE_WRITE_LOCKED(l)
-#define MUST_BE_READ_WRITE_UNLOCKED(l)
-
-#define LOCK_BH(l) spin_lock_bh(l)
-#define UNLOCK_BH(l) spin_unlock_bh(l)
-
-#define READ_LOCK(l) read_lock_bh(l)
-#define WRITE_LOCK(l) write_lock_bh(l)
-#define READ_UNLOCK(l) read_unlock_bh(l)
-#define WRITE_UNLOCK(l) write_unlock_bh(l)
-#endif /*CONFIG_NETFILTER_DEBUG*/
-
-#endif /* _LOCKHELP_H */
-- 
cgit v1.2.3


From 18b8afc771102b1b6af97962808291a7d27f52af Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 21 Jun 2005 14:01:57 -0700
Subject: [NETFILTER]: Kill nf_debug

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4.h |  6 ------
 include/linux/skbuff.h         | 13 -------------
 2 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 9e5750079e09..3ebc36afae1a 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -75,12 +75,6 @@ enum nf_ip_hook_priorities {
 #define SO_ORIGINAL_DST 80
 
 #ifdef __KERNEL__
-#ifdef CONFIG_NETFILTER_DEBUG
-void nf_debug_ip_local_deliver(struct sk_buff *skb);
-void nf_debug_ip_loopback_xmit(struct sk_buff *newskb);
-void nf_debug_ip_finish_output2(struct sk_buff *skb);
-#endif /*CONFIG_NETFILTER_DEBUG*/
-
 extern int ip_route_me_harder(struct sk_buff **pskb);
 
 /* Call this before modifying an existing IP packet: ensures it is
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cc04f5cd2286..d7c839a21842 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -193,7 +193,6 @@ struct skb_shared_info {
  *	@nfcache: Cache info
  *	@nfct: Associated connection, if any
  *	@nfctinfo: Relationship of this skb to the connection
- *	@nf_debug: Netfilter debugging
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *      @private: Data which is private to the HIPPI implementation
  *	@tc_index: Traffic control index
@@ -264,9 +263,6 @@ struct sk_buff {
 	__u32			nfcache;
 	__u32			nfctinfo;
 	struct nf_conntrack	*nfct;
-#ifdef CONFIG_NETFILTER_DEBUG
-        unsigned int		nf_debug;
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
@@ -1219,15 +1215,6 @@ static inline void nf_reset(struct sk_buff *skb)
 {
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
-#ifdef CONFIG_NETFILTER_DEBUG
-	skb->nf_debug = 0;
-#endif
-}
-static inline void nf_reset_debug(struct sk_buff *skb)
-{
-#ifdef CONFIG_NETFILTER_DEBUG
-	skb->nf_debug = 0;
-#endif
 }
 
 #ifdef CONFIG_BRIDGE_NETFILTER
-- 
cgit v1.2.3


From 39c715b71740c4a78ba4769fb54826929bac03cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 21 Jun 2005 17:14:34 -0700
Subject: [PATCH] smp_processor_id() cleanup

This patch implements a number of smp_processor_id() cleanup ideas that
Arjan van de Ven and I came up with.

The previous __smp_processor_id/_smp_processor_id/smp_processor_id API
spaghetti was hard to follow both on the implementational and on the
usage side.

Some of the complexity arose from picking wrong names, some of the
complexity comes from the fact that not all architectures defined
__smp_processor_id.

In the new code, there are two externally visible symbols:

 - smp_processor_id(): debug variant.

 - raw_smp_processor_id(): nondebug variant. Replaces all existing
   uses of _smp_processor_id() and __smp_processor_id(). Defined
   by every SMP architecture in include/asm-*/smp.h.

There is one new internal symbol, dependent on DEBUG_PREEMPT:

 - debug_smp_processor_id(): internal debug variant, mapped to
                             smp_processor_id().

Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new
lib/smp_processor_id.c file.  All related comments got updated and/or
clarified.

I have build/boot tested the following 8 .config combinations on x86:

 {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT}

I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT.  (Other
architectures are untested, but should work just fine.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h |  2 +-
 include/linux/smp.h    | 40 ++++++++++++++++------------------------
 2 files changed, 17 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e530c6c092f1..beacd931b606 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -381,7 +381,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
-#define numa_node_id()		(cpu_to_node(_smp_processor_id()))
+#define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
 
 #ifndef CONFIG_DISCONTIGMEM
 
diff --git a/include/linux/smp.h b/include/linux/smp.h
index dcf1db3b35d3..9dfa3ee769ae 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -92,10 +92,7 @@ void smp_prepare_boot_cpu(void);
 /*
  *	These macros fold the SMP functionality into a single CPU system
  */
-
-#if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT)
-# define smp_processor_id()			0
-#endif
+#define raw_smp_processor_id()			0
 #define hard_smp_processor_id()			0
 #define smp_call_function(func,info,retry,wait)	({ 0; })
 #define on_each_cpu(func,info,retry,wait)	({ func(info); 0; })
@@ -106,30 +103,25 @@ static inline void smp_send_reschedule(int cpu) { }
 #endif /* !SMP */
 
 /*
- * DEBUG_PREEMPT support: check whether smp_processor_id() is being
- * used in a preemption-safe way.
+ * smp_processor_id(): get the current CPU ID.
  *
- * An architecture has to enable this debugging code explicitly.
- * It can do so by renaming the smp_processor_id() macro to
- * __smp_processor_id().  This should only be done after some minimal
- * testing, because usually there are a number of false positives
- * that an architecture will trigger.
+ * if DEBUG_PREEMPT is enabled the we check whether it is
+ * used in a preemption-safe way. (smp_processor_id() is safe
+ * if it's used in a preemption-off critical section, or in
+ * a thread that is bound to the current CPU.)
  *
- * To fix a false positive (i.e. smp_processor_id() use that the
- * debugging code reports but which use for some reason is legal),
- * change the smp_processor_id() reference to _smp_processor_id(),
- * which is the nondebug variant.  NOTE: don't use this to hack around
- * real bugs.
+ * NOTE: raw_smp_processor_id() is for internal use only
+ * (smp_processor_id() is the preferred variant), but in rare
+ * instances it might also be used to turn off false positives
+ * (i.e. smp_processor_id() use that the debugging code reports but
+ * which use for some reason is legal). Don't use this to hack around
+ * the warning message, as your code might not work under PREEMPT.
  */
-#ifdef __smp_processor_id
-# if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
-   extern unsigned int smp_processor_id(void);
-# else
-#  define smp_processor_id() __smp_processor_id()
-# endif
-# define _smp_processor_id() __smp_processor_id()
+#ifdef CONFIG_DEBUG_PREEMPT
+  extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
 #else
-# define _smp_processor_id() smp_processor_id()
+# define smp_processor_id() raw_smp_processor_id()
 #endif
 
 #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
-- 
cgit v1.2.3


From 753ee728964e5afb80c17659cc6c3a6fd0a42fe0 Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@sgi.com>
Date: Tue, 21 Jun 2005 17:14:41 -0700
Subject: [PATCH] VM: early zone reclaim

This is the core of the (much simplified) early reclaim.  The goal of this
patch is to reclaim some easily-freed pages from a zone before falling back
onto another zone.

One of the major uses of this is NUMA machines.  With the default allocator
behavior the allocator would look for memory in another zone, which might be
off-node, before trying to reclaim from the current zone.

This adds a zone tuneable to enable early zone reclaim.  It is selected on a
per-zone basis and is turned on/off via syscall.

Adding some extra throttling on the reclaim was also required (patch
4/4).  Without the machine would grind to a crawl when doing a "make -j"
kernel build.  Even with this patch the System Time is higher on
average, but it seems tolerable.  Here are some numbers for kernbench
runs on a 2-node, 4cpu, 8Gig RAM Altix in the "make -j" run:

			wall  user   sys   %cpu  ctx sw.  sleeps
			----  ----   ---   ----   ------  ------
No patch		1009  1384   847   258   298170   504402
w/patch, no reclaim     880   1376   667   288   254064   396745
w/patch & reclaim       1079  1385   926   252   291625   548873

These numbers are the average of 2 runs of 3 "make -j" runs done right
after system boot.  Run-to-run variability for "make -j" is huge, so
these numbers aren't terribly useful except to seee that with reclaim
the benchmark still finishes in a reasonable amount of time.

I also looked at the NUMA hit/miss stats for the "make -j" runs and the
reclaim doesn't make any difference when the machine is thrashing away.

Doing a "make -j8" on a single node that is filled with page cache pages
takes 700 seconds with reclaim turned on and 735 seconds without reclaim
(due to remote memory accesses).

The simple zone_reclaim syscall program is at
http://www.bork.org/~mort/sgi/zone_reclaim.c

Signed-off-by: Martin Hicks <mort@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 6 ++++++
 include/linux/swap.h   | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index beacd931b606..dfc2452ccb10 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -144,6 +144,12 @@ struct zone {
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	int			all_unreclaimable; /* All pages pinned */
 
+	/*
+	 * Does the allocator try to reclaim pages from the zone as soon
+	 * as it fails a watermark_ok() in __alloc_pages?
+	 */
+	int			reclaim_pages;
+
 	/*
 	 * prev_priority holds the scanning priority for this zone.  It is
 	 * defined as the scanning priority at which we achieved our reclaim
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3bbc41be9bd0..0d21e682d99d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -173,6 +173,7 @@ extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
 extern int try_to_free_pages(struct zone **, unsigned int, unsigned int);
+extern int zone_reclaim(struct zone *, unsigned int, unsigned int);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
 
-- 
cgit v1.2.3


From 0c35bbadc59f5ed105c34471143eceb4c0dd9c95 Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@sgi.com>
Date: Tue, 21 Jun 2005 17:14:42 -0700
Subject: [PATCH] VM: add __GFP_NORECLAIM

When using the early zone reclaim, it was noticed that allocating new pages
that should be spread across the whole system caused eviction of local pages.

This adds a new GFP flag to prevent early reclaim from happening during
certain allocation attempts.  The example that is implemented here is for page
cache pages.  We want page cache pages to be spread across the whole system,
and we don't want page cache pages to evict other pages to get local memory.

Signed-off-by:  Martin Hicks <mort@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h     | 3 ++-
 include/linux/pagemap.h | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index af7407e8cfc5..208535fd4832 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,6 +39,7 @@ struct vm_area_struct;
 #define __GFP_COMP	0x4000u	/* Add compound page metadata */
 #define __GFP_ZERO	0x8000u	/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
+#define __GFP_NORECLAIM  0x20000u /* No realy zone reclaim during allocation */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
@@ -47,7 +48,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC)
+			__GFP_NOMEMALLOC|__GFP_NORECLAIM)
 
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0422031161ba..d9a25647a295 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -52,12 +52,12 @@ void release_pages(struct page **pages, int nr, int cold);
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x), 0);
+	return alloc_pages(mapping_gfp_mask(x)|__GFP_NORECLAIM, 0);
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
+	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD|__GFP_NORECLAIM, 0);
 }
 
 typedef int filler_t(void *, struct page *);
-- 
cgit v1.2.3


From 1e7e5a9048b30c57ba1ddaa6cdf59b21b65cde99 Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@sgi.com>
Date: Tue, 21 Jun 2005 17:14:43 -0700
Subject: [PATCH] VM: rate limit early reclaim

When early zone reclaim is turned on the LRU is scanned more frequently when a
zone is low on memory.  This limits when the zone reclaim can be called by
skipping the scan if another thread (either via kswapd or sync reclaim) is
already reclaiming from the zone.

Signed-off-by: Martin Hicks <mort@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dfc2452ccb10..18fed8b67943 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -149,6 +149,8 @@ struct zone {
 	 * as it fails a watermark_ok() in __alloc_pages?
 	 */
 	int			reclaim_pages;
+	/* A count of how many reclaimers are scanning this zone */
+	atomic_t		reclaim_in_progress;
 
 	/*
 	 * prev_priority holds the scanning priority for this zone.  It is
-- 
cgit v1.2.3


From 63551ae0feaaa23807ebea60de1901564bbef32e Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 21 Jun 2005 17:14:44 -0700
Subject: [PATCH] Hugepage consolidation

A lot of the code in arch/*/mm/hugetlbpage.c is quite similar.  This patch
attempts to consolidate a lot of the code across the arch's, putting the
combined version in mm/hugetlb.c.  There are a couple of uglyish hacks in
order to covert all the hugepage archs, but the result is a very large
reduction in the total amount of code.  It also means things like hugepage
lazy allocation could be implemented in one place, instead of six.

Tested, at least a little, on ppc64, i386 and x86_64.

Notes:
	- this patch changes the meaning of set_huge_pte() to be more
	  analagous to set_pte()
	- does SH4 need s special huge_ptep_get_and_clear()??

Acked-by: William Lee Irwin <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hugetlb.h | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6af1ae4a8211..f529d1442815 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -4,6 +4,7 @@
 #ifdef CONFIG_HUGETLB_PAGE
 
 #include <linux/mempolicy.h>
+#include <asm/tlbflush.h>
 
 struct ctl_table;
 
@@ -22,12 +23,6 @@ int hugetlb_report_meminfo(char *);
 int hugetlb_report_node_meminfo(int, char *);
 int is_hugepage_mem_enough(size_t);
 unsigned long hugetlb_total_pages(void);
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
-			      int write);
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-				pmd_t *pmd, int write);
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
-int pmd_huge(pmd_t pmd);
 struct page *alloc_huge_page(void);
 void free_huge_page(struct page *);
 
@@ -35,6 +30,17 @@ extern unsigned long max_huge_pages;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 
+/* arch callbacks */
+
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr);
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write);
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+				pmd_t *pmd, int write);
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
+int pmd_huge(pmd_t pmd);
+
 #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
@@ -48,6 +54,28 @@ extern int sysctl_hugetlb_shm_group;
 int prepare_hugepage_range(unsigned long addr, unsigned long len);
 #endif
 
+#ifndef ARCH_HAS_SETCLEAR_HUGE_PTE
+#define set_huge_pte_at(mm, addr, ptep, pte)	set_pte_at(mm, addr, ptep, pte)
+#define huge_ptep_get_and_clear(mm, addr, ptep) ptep_get_and_clear(mm, addr, ptep)
+#else
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep);
+#endif
+
+#ifndef ARCH_HAS_HUGETLB_PREFAULT_HOOK
+#define hugetlb_prefault_arch_hook(mm)		do { } while (0)
+#else
+void hugetlb_prefault_arch_hook(struct mm_struct *mm);
+#endif
+
+#ifndef ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE
+#define hugetlb_clean_stale_pgtable(pte)	BUG()
+#else
+void hugetlb_clean_stale_pgtable(pte_t *pte);
+#endif
+
 #else /* !CONFIG_HUGETLB_PAGE */
 
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
-- 
cgit v1.2.3


From e7c8d5c9955a4d2e88e36b640563f5d6d5aba48a Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@lameter.com>
Date: Tue, 21 Jun 2005 17:14:47 -0700
Subject: [PATCH] node local per-cpu-pages

This patch modifies the way pagesets in struct zone are managed.

Each zone has a per-cpu array of pagesets.  So any particular CPU has some
memory in each zone structure which belongs to itself.  Even if that CPU is
not local to that zone.

So the patch relocates the pagesets for each cpu to the node that is nearest
to the cpu instead of allocating the pagesets in the (possibly remote) target
zone.  This means that the operations to manage pages on remote zone can be
done with information available locally.

We play a macro trick so that non-NUMA pmachines avoid the additional
pointer chase on the page allocator fastpath.

AIM7 benchmark on a 32 CPU SGI Altix

w/o patches:
Tasks    jobs/min  jti  jobs/min/task      real       cpu
    1      484.68  100       484.6769     12.01      1.97   Fri Mar 25 11:01:42 2005
  100    27140.46   89       271.4046     21.44    148.71   Fri Mar 25 11:02:04 2005
  200    30792.02   82       153.9601     37.80    296.72   Fri Mar 25 11:02:42 2005
  300    32209.27   81       107.3642     54.21    451.34   Fri Mar 25 11:03:37 2005
  400    34962.83   78        87.4071     66.59    588.97   Fri Mar 25 11:04:44 2005
  500    31676.92   75        63.3538     91.87    742.71   Fri Mar 25 11:06:16 2005
  600    36032.69   73        60.0545     96.91    885.44   Fri Mar 25 11:07:54 2005
  700    35540.43   77        50.7720    114.63   1024.28   Fri Mar 25 11:09:49 2005
  800    33906.70   74        42.3834    137.32   1181.65   Fri Mar 25 11:12:06 2005
  900    34120.67   73        37.9119    153.51   1325.26   Fri Mar 25 11:14:41 2005
 1000    34802.37   74        34.8024    167.23   1465.26   Fri Mar 25 11:17:28 2005

with slab API changes and pageset patch:

Tasks    jobs/min  jti  jobs/min/task      real       cpu
    1      485.00  100       485.0000     12.00      1.96   Fri Mar 25 11:46:18 2005
  100    28000.96   89       280.0096     20.79    150.45   Fri Mar 25 11:46:39 2005
  200    32285.80   79       161.4290     36.05    293.37   Fri Mar 25 11:47:16 2005
  300    40424.15   84       134.7472     43.19    438.42   Fri Mar 25 11:47:59 2005
  400    39155.01   79        97.8875     59.46    590.05   Fri Mar 25 11:48:59 2005
  500    37881.25   82        75.7625     76.82    730.19   Fri Mar 25 11:50:16 2005
  600    39083.14   78        65.1386     89.35    872.79   Fri Mar 25 11:51:46 2005
  700    38627.83   77        55.1826    105.47   1022.46   Fri Mar 25 11:53:32 2005
  800    39631.94   78        49.5399    117.48   1169.94   Fri Mar 25 11:55:30 2005
  900    36903.70   79        41.0041    141.94   1310.78   Fri Mar 25 11:57:53 2005
 1000    36201.23   77        36.2012    160.77   1458.31   Fri Mar 25 12:00:34 2005

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Shobhit Dayal <shobhit@calsoftinc.com>
Signed-off-by: Shai Fultheim <Shai@Scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     |  6 ++++++
 include/linux/mmzone.h | 11 ++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 17518fe0b311..1813b162b0a8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -691,6 +691,12 @@ extern void show_mem(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 
+#ifdef CONFIG_NUMA
+extern void setup_per_cpu_pageset(void);
+#else
+static inline void setup_per_cpu_pageset(void) {}
+#endif
+
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
 void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 18fed8b67943..4733d35d8223 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -63,6 +63,12 @@ struct per_cpu_pageset {
 #endif
 } ____cacheline_aligned_in_smp;
 
+#ifdef CONFIG_NUMA
+#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
+#else
+#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
+#endif
+
 #define ZONE_DMA		0
 #define ZONE_NORMAL		1
 #define ZONE_HIGHMEM		2
@@ -122,8 +128,11 @@ struct zone {
 	 */
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
 
+#ifdef CONFIG_NUMA
+	struct per_cpu_pageset	*pageset[NR_CPUS];
+#else
 	struct per_cpu_pageset	pageset[NR_CPUS];
-
+#endif
 	/*
 	 * free areas of different sizes
 	 */
-- 
cgit v1.2.3


From 1363c3cd8603a913a27e2995dccbd70d5312d8e6 Mon Sep 17 00:00:00 2001
From: Wolfgang Wander <wwc@rentec.com>
Date: Tue, 21 Jun 2005 17:14:49 -0700
Subject: [PATCH] Avoiding mmap fragmentation

Ingo recently introduced a great speedup for allocating new mmaps using the
free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and
causes huge performance increases in thread creation.

The downside of this patch is that it does lead to fragmentation in the
mmap-ed areas (visible via /proc/self/maps), such that some applications
that work fine under 2.4 kernels quickly run out of memory on any 2.6
kernel.

The problem is twofold:

  1) the free_area_cache is used to continue a search for memory where
     the last search ended.  Before the change new areas were always
     searched from the base address on.

     So now new small areas are cluttering holes of all sizes
     throughout the whole mmap-able region whereas before small holes
     tended to close holes near the base leaving holes far from the base
     large and available for larger requests.

  2) the free_area_cache also is set to the location of the last
     munmap-ed area so in scenarios where we allocate e.g.  five regions of
     1K each, then free regions 4 2 3 in this order the next request for 1K
     will be placed in the position of the old region 3, whereas before we
     appended it to the still active region 1, placing it at the location
     of the old region 2.  Before we had 1 free region of 2K, now we only
     get two free regions of 1K -> fragmentation.

The patch addresses thes issues by introducing yet another cache descriptor
cached_hole_size that contains the largest known hole size below the
current free_area_cache.  If a new request comes in the size is compared
against the cached_hole_size and if the request can be filled with a hole
below free_area_cache the search is started from the base instead.

The results look promising: Whereas 2.6.12-rc4 fragments quickly and my
(earlier posted) leakme.c test program terminates after 50000+ iterations
with 96 distinct and fragmented maps in /proc/self/maps it performs nicely
(as expected) with thread creation, Ingo's test_str02 with 20000 threads
requires 0.7s system time.

Taking out Ingo's patch (un-patch available per request) by basically
deleting all mentions of free_area_cache from the kernel and starting the
search for new memory always at the respective bases we observe: leakme
terminates successfully with 11 distinctive hardly fragmented areas in
/proc/self/maps but thread creating is gringdingly slow: 30+s(!) system
time for Ingo's test_str02 with 20000 threads.

Now - drumroll ;-) the appended patch works fine with leakme: it ends with
only 7 distinct areas in /proc/self/maps and also thread creation seems
sufficiently fast with 0.71s for 20000 threads.

Signed-off-by: Wolfgang Wander <wwc@rentec.com>
Credit-to: "Richard Purdie" <rpurdie@rpsys.net>
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu> (partly)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4dbb109022f3..b58afd97a180 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -201,8 +201,8 @@ extern unsigned long
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long len, unsigned long pgoff,
 			  unsigned long flags);
-extern void arch_unmap_area(struct vm_area_struct *area);
-extern void arch_unmap_area_topdown(struct vm_area_struct *area);
+extern void arch_unmap_area(struct mm_struct *, unsigned long);
+extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 
 #define set_mm_counter(mm, member, value) (mm)->_##member = (value)
 #define get_mm_counter(mm, member) ((mm)->_##member)
@@ -218,9 +218,10 @@ struct mm_struct {
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
-	void (*unmap_area) (struct vm_area_struct *area);
-	unsigned long mmap_base;		/* base of mmap area */
-	unsigned long free_area_cache;		/* first hole */
+	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+        unsigned long mmap_base;		/* base of mmap area */
+        unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
+	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
-- 
cgit v1.2.3


From cbe37d093707762fc0abb280781e6a82a9d8d568 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Tue, 21 Jun 2005 17:14:52 -0700
Subject: [PATCH] mm: remove PG_highmem

Remove PG_highmem, to save a page flag.  Use is_highmem() instead.  It'll
generate a little more code, but we don't use PageHigheMem() in many places.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/page-flags.h | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 39ab8c6b5652..df313891db10 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -61,21 +61,20 @@
 #define PG_active		 6
 #define PG_slab			 7	/* slab debug (Suparna wants this) */
 
-#define PG_highmem		 8
-#define PG_checked		 9	/* kill me in 2.5.<early>. */
-#define PG_arch_1		10
-#define PG_reserved		11
-
-#define PG_private		12	/* Has something at ->private */
-#define PG_writeback		13	/* Page is under writeback */
-#define PG_nosave		14	/* Used for system suspend/resume */
-#define PG_compound		15	/* Part of a compound page */
-
-#define PG_swapcache		16	/* Swap page: swp_entry_t in private */
-#define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
-#define PG_reclaim		18	/* To be reclaimed asap */
-#define PG_nosave_free		19	/* Free, should not be written */
-#define PG_uncached		20	/* Page has been mapped as uncached */
+#define PG_checked		 8	/* kill me in 2.5.<early>. */
+#define PG_arch_1		 9
+#define PG_reserved		10
+#define PG_private		11	/* Has something at ->private */
+
+#define PG_writeback		12	/* Page is under writeback */
+#define PG_nosave		13	/* Used for system suspend/resume */
+#define PG_compound		14	/* Part of a compound page */
+#define PG_swapcache		15	/* Swap page: swp_entry_t in private */
+
+#define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
+#define PG_reclaim		17	/* To be reclaimed asap */
+#define PG_nosave_free		18	/* Free, should not be written */
+#define PG_uncached		19	/* Page has been mapped as uncached */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -215,7 +214,7 @@ extern void __mod_page_state(unsigned offset, unsigned long delta);
 #define TestSetPageSlab(page)	test_and_set_bit(PG_slab, &(page)->flags)
 
 #ifdef CONFIG_HIGHMEM
-#define PageHighMem(page)	test_bit(PG_highmem, &(page)->flags)
+#define PageHighMem(page)	is_highmem(page_zone(page))
 #else
 #define PageHighMem(page)	0 /* needed to optimize away at compile time */
 #endif
-- 
cgit v1.2.3


From 1ad539b2bd89bf2e129123eb24d5bcc4484a35de Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Tue, 21 Jun 2005 17:14:53 -0700
Subject: [PATCH] vm: try_to_free_pages unused argument

try_to_free_pages accepts a third argument, order, but hasn't used it since
before 2.6.0.  The following patch removes the argument and updates all the
calls to try_to_free_pages.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0d21e682d99d..2343f999e6e1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -172,7 +172,7 @@ extern int rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
-extern int try_to_free_pages(struct zone **, unsigned int, unsigned int);
+extern int try_to_free_pages(struct zone **, unsigned int);
 extern int zone_reclaim(struct zone *, unsigned int, unsigned int);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
-- 
cgit v1.2.3


From 83e5d8f7253cb7b14472385a6d57df1e9f848e8e Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Tue, 21 Jun 2005 17:14:54 -0700
Subject: [PATCH] __mod_page_state(): pass unsigned long instead of unsigned

By making the offset argument of __mod_page_state an unsigned long instead
of unsigned, we can avoid forcing the compiler to sign extend a usually
constant argument.  This saves 1 instruction on x86-64.

Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index df313891db10..f2ee9b2332e3 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -136,7 +136,7 @@ struct page_state {
 extern void get_page_state(struct page_state *ret);
 extern void get_full_page_state(struct page_state *ret);
 extern unsigned long __read_page_state(unsigned offset);
-extern void __mod_page_state(unsigned offset, unsigned long delta);
+extern void __mod_page_state(unsigned long offset, unsigned long delta);
 
 #define read_page_state(member) \
 	__read_page_state(offsetof(struct page_state, member))
-- 
cgit v1.2.3


From c2f29ea111e3344ed48257c2a142c3db514e1529 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Tue, 21 Jun 2005 17:14:55 -0700
Subject: [PATCH] __read_page_state(): pass unsigned long instead of unsigned

By making the offset argument of __read_page_state an unsigned long instead of
unsigned, we can avoid forcing the compiler to sign extend a usually constant
argument.  This saves 1 instruction on x86-64.

Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f2ee9b2332e3..f5a6695d4d21 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -135,7 +135,7 @@ struct page_state {
 
 extern void get_page_state(struct page_state *ret);
 extern void get_full_page_state(struct page_state *ret);
-extern unsigned long __read_page_state(unsigned offset);
+extern unsigned long __read_page_state(unsigned long offset);
 extern void __mod_page_state(unsigned long offset, unsigned long delta);
 
 #define read_page_state(member) \
-- 
cgit v1.2.3


From 4ae7c03943fca73f23bc0cdb938070f41b98101f Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Tue, 21 Jun 2005 17:14:57 -0700
Subject: [PATCH] Periodically drain non local pagesets

The pageset array can potentially acquire a huge amount of memory on large
NUMA systems.  F.e.  on a system with 512 processors and 256 nodes there
will be 256*512 pagesets.  If each pageset only holds 5 pages then we are
talking about 655360 pages.With a 16K page size on IA64 this results in
potentially 10 Gigabytes of memory being trapped in pagesets.  The typical
cases are much less for smaller systems but there is still the potential of
memory being trapped in off node pagesets.  Off node memory may be rarely
used if local memory is available and so we may potentially have memory in
seldom used pagesets without this patch.

The slab allocator flushes its per cpu caches every 2 seconds.  The
following patch flushes the off node pageset caches in the same way by
tying into the slab flush.

The patch also changes /proc/zoneinfo to include the number of pages
currently in each pageset.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 208535fd4832..8d6bf608b199 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -133,5 +133,10 @@ extern void FASTCALL(free_cold_page(struct page *page));
 #define free_page(addr) free_pages((addr),0)
 
 void page_alloc_init(void);
+#ifdef CONFIG_NUMA
+void drain_remote_pages(void);
+#else
+static inline void drain_remote_pages(void) { };
+#endif
 
 #endif /* __LINUX_GFP_H */
-- 
cgit v1.2.3


From f14f75b81187cdbe10cc53a521bf9fdf97b59f8c Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@wildopensource.com>
Date: Tue, 21 Jun 2005 17:15:02 -0700
Subject: [PATCH] ia64 uncached alloc

This patch contains the ia64 uncached page allocator and the generic
allocator (genalloc).  The uncached allocator was formerly part of the SN2
mspec driver but there are several other users of it so it has been split
off from the driver.

The generic allocator can be used by device driver to manage special memory
etc.  The generic allocator is based on the allocator from the sym53c8xx_2
driver.

Various users on ia64 needs uncached memory.  The SGI SN architecture requires
it for inter-partition communication between partitions within a large NUMA
cluster.  The specific user for this is the XPC code.  Another application is
large MPI style applications which use it for synchronization, on SN this can
be done using special 'fetchop' operations but it also benefits non SN
hardware which may use regular uncached memory for this purpose.  Performance
of doing this through uncached vs cached memory is pretty substantial.  This
is handled by the mspec driver which I will push out in a seperate patch.

Rather than creating a specific allocator for just uncached memory I came up
with genalloc which is a generic purpose allocator that can be used by device
drivers and other subsystems as they please.  For instance to handle onboard
device memory.  It was derived from the sym53c7xx_2 driver's allocator which
is also an example of a potential user (I am refraining from modifying sym2
right now as it seems to have been under fairly heavy development recently).

On ia64 memory has various properties within a granule, ie.  it isn't safe to
access memory as uncached within the same granule as currently has memory
accessed in cached mode.  The regular system therefore doesn't utilize memory
in the lower granules which is mixed in with device PAL code etc.  The
uncached driver walks the EFI memmap and pulls out the spill uncached pages
and sticks them into the uncached pool.  Only after these chunks have been
utilized, will it start converting regular cached memory into uncached memory.
Hence the reason for the EFI related code additions.

Signed-off-by: Jes Sorensen <jes@wildopensource.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/genalloc.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 include/linux/genalloc.h

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
new file mode 100644
index 000000000000..7fd0576a4454
--- /dev/null
+++ b/include/linux/genalloc.h
@@ -0,0 +1,40 @@
+/*
+ * Basic general purpose allocator for managing special purpose memory
+ * not managed by the regular kmalloc/kfree interface.
+ * Uses for this includes on-device special memory, uncached memory
+ * etc.
+ *
+ * This code is based on the buddy allocator found in the sym53c8xx_2
+ * driver, adapted for general purpose use.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/spinlock.h>
+
+#define ALLOC_MIN_SHIFT		5 /* 32 bytes minimum */
+/*
+ *  Link between free memory chunks of a given size.
+ */
+struct gen_pool_link {
+	struct gen_pool_link *next;
+};
+
+/*
+ *  Memory pool descriptor.
+ */
+struct gen_pool {
+	spinlock_t lock;
+	unsigned long (*get_new_chunk)(struct gen_pool *);
+	struct gen_pool *next;
+	struct gen_pool_link *h;
+	unsigned long private;
+	int max_chunk_shift;
+};
+
+unsigned long gen_pool_alloc(struct gen_pool *poolp, int size);
+void gen_pool_free(struct gen_pool *mp, unsigned long ptr, int size);
+struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift,
+				 unsigned long (*fp)(struct gen_pool *),
+				 unsigned long data);
-- 
cgit v1.2.3


From 5b37b700f7c491a9320f4e29472bbaf23dded8fd Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@freescale.com>
Date: Tue, 21 Jun 2005 17:15:18 -0700
Subject: [PATCH] ppc32: Added support for new MPC8548 family of PowerQUICC III
 processors

Added descriptions of the new MPC8548 family processors, e500 core and
peripherals.

Signed-off-by: Kumar Gala <kumar.gala@freescale.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fsl_devices.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index faaff4c64559..70f54af87b9f 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -51,6 +51,7 @@ struct gianfar_platform_data {
 
 	/* board specific information */
 	u32 board_flags;
+	u32 phy_flags;
 	u32 phyid;
 	u32 interruptPHY;
 	u8 mac_addr[6];
@@ -61,9 +62,14 @@ struct gianfar_platform_data {
 #define FSL_GIANFAR_DEV_HAS_COALESCE		0x00000002
 #define FSL_GIANFAR_DEV_HAS_RMON		0x00000004
 #define FSL_GIANFAR_DEV_HAS_MULTI_INTR		0x00000008
+#define FSL_GIANFAR_DEV_HAS_CSUM		0x00000010
+#define FSL_GIANFAR_DEV_HAS_VLAN		0x00000020
+#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH	0x00000040
+#define FSL_GIANFAR_DEV_HAS_PADDING		0x00000080
 
 /* Flags in gianfar_platform_data */
-#define FSL_GIANFAR_BRD_HAS_PHY_INTR	0x00000001	/* if not set use a timer */
+#define FSL_GIANFAR_BRD_HAS_PHY_INTR	0x00000001 /* set or use a timer */
+#define FSL_GIANFAR_BRD_IS_REDUCED	0x00000002 /* Set if RGMII, RMII */
 
 struct fsl_i2c_platform_data {
 	/* device specific information */
-- 
cgit v1.2.3


From 22329b511a97557b293583194037d1f4c71e1504 Mon Sep 17 00:00:00 2001
From: Brent Casavant <bcasavan@sgi.com>
Date: Tue, 21 Jun 2005 17:15:59 -0700
Subject: [PATCH] ioc4: Core driver rewrite

This series of patches reworks the configuration and internal structure
of the SGI IOC4 I/O controller device drivers.

These changes are motivated by several factors:

- The IOC4 chip PCI resources are of mixed use between functions (i.e.
  multiple functions are handled in the same address range, sometimes
  within the same register), muddling resource ownership and initialization
  issues.  Centralizing this ownership in a core driver is desirable.

- The IOC4 chip implements multiple functions (serial, IDE, others not
  yet implemented in the mainline kernel) but is not a multifunction
  PCI device.  In order to properly handle device addition and removal
  as well as module insertion and deletion, an intermediary IOC4-specific
  driver layer is needed to handle these operations cleanly.

- All IOC4 drivers are currently enabled by a single CONFIG value.  As
  not all systems need all IOC4 functions, it is desireable to enable
  these drivers independently.

- The current IOC4 core driver will trigger loading of all function-level
  drivers, as it makes direct calls to them.  This situation should be
  reversed (i.e. function-level drivers cause loading of core driver)
  in order to maintain a clear and least-surprise driver loading model.

- IOC4 hardware design necessitates some driver-level dependency on
  the PCI bus clock speed.  Current code assumes a 66MHz bus, but the
  speed should be autodetected and appropriate compensation taken.

This patch series effects the above changes by a newly and better designed
IOC4 core driver with which the function-level drivers can register and
deregister themselves upon module insertion/removal.  By tracking these
modules, device addition/removal is also handled properly.  PCI resource
management and ownership issues are centralized in this core driver, and
IOC4-wide configuration actions such as bus speed detection are also
handled in this core driver.

This patch:

The SGI IOC4 I/O controller chip implements multiple functions, though it is
not a multi-function PCI device.  Additionally, various PCI resources of the
IOC4 are shared by multiple hardware functions, and thus resource ownership by
driver is not clearly delineated.  Due to the current driver design, all core
and subordinate drivers must be loaded, or none, which is undesirable if not
all IOC4 hardware features are being used.

This patch reorganizes the IOC4 drivers so that the core driver provides a
subdriver registration service.  Through appropriate callbacks the subdrivers
can now handle device addition and removal, as well as module insertion and
deletion (though the IOC4 IDE driver requires further work before module
deletion will work).  The core driver now takes care of allocating PCI
resources and data which must be shared between subdrivers, to clearly
delineate module ownership of these items.

Signed-off-by: Brent Casavant <bcasavan@sgi.com>
Acked-by: Pat Gefre <pfg@sgi.com
Acked-by: Jeremy Higdon <jeremy@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ioc4.h        | 156 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ioc4_common.h |  21 ------
 2 files changed, 156 insertions(+), 21 deletions(-)
 create mode 100644 include/linux/ioc4.h
 delete mode 100644 include/linux/ioc4_common.h

(limited to 'include/linux')

diff --git a/include/linux/ioc4.h b/include/linux/ioc4.h
new file mode 100644
index 000000000000..729bfa4c4ac5
--- /dev/null
+++ b/include/linux/ioc4.h
@@ -0,0 +1,156 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#ifndef _LINUX_IOC4_H
+#define _LINUX_IOC4_H
+
+#include <linux/interrupt.h>
+
+/***********************************
+ * Structures needed by subdrivers *
+ ***********************************/
+
+/* This structure fully describes the IOC4 miscellaneous registers which
+ * appear at bar[0]+0x00000 through bar[0]+0x0005c.  The corresponding
+ * PCI resource is managed by the main IOC4 driver because it contains
+ * registers of interest to many different IOC4 subdrivers.
+ */
+struct ioc4_misc_regs {
+	/* Miscellaneous IOC4 registers */
+	union ioc4_pci_err_addr_l {
+		uint32_t raw;
+		struct {
+			uint32_t valid:1;	/* Address captured */
+			uint32_t master_id:4;	/* Unit causing error
+						 * 0/1: Serial port 0 TX/RX
+						 * 2/3: Serial port 1 TX/RX
+						 * 4/5: Serial port 2 TX/RX
+						 * 6/7: Serial port 3 TX/RX
+						 * 8: ATA/ATAPI
+						 * 9-15: Undefined
+						 */
+			uint32_t mul_err:1;	/* Multiple errors occurred */
+			uint32_t addr:26;	/* Bits 31-6 of error addr */
+		} fields;
+	} pci_err_addr_l;
+	uint32_t pci_err_addr_h;	/* Bits 63-32 of error addr */
+	union ioc4_sio_int {
+		uint32_t raw;
+		struct {
+			uint8_t tx_mt:1;	/* TX ring buffer empty */
+			uint8_t rx_full:1;	/* RX ring buffer full */
+			uint8_t rx_high:1;	/* RX high-water exceeded */
+			uint8_t rx_timer:1;	/* RX timer has triggered */
+			uint8_t delta_dcd:1;	/* DELTA_DCD seen */
+			uint8_t delta_cts:1;	/* DELTA_CTS seen */
+			uint8_t intr_pass:1;	/* Interrupt pass-through */
+			uint8_t tx_explicit:1;	/* TX, MCW, or delay complete */
+		} fields[4];
+	} sio_ir;		/* Serial interrupt state */
+	union ioc4_other_int {
+		uint32_t raw;
+		struct {
+			uint32_t ata_int:1;	/* ATA port passthru */
+			uint32_t ata_memerr:1;	/* ATA halted by mem error */
+			uint32_t memerr:4;	/* Serial halted by mem err */
+			uint32_t kbd_int:1;	/* kbd/mouse intr asserted */
+			uint32_t reserved:16;	/* zero */
+			uint32_t rt_int:1;	/* INT_OUT section latch */
+			uint32_t gen_int:8;	/* Intr. from generic pins */
+		} fields;
+	} other_ir;		/* Other interrupt state */
+	union ioc4_sio_int sio_ies;	/* Serial interrupt enable set */
+	union ioc4_other_int other_ies;	/* Other interrupt enable set */
+	union ioc4_sio_int sio_iec;	/* Serial interrupt enable clear */
+	union ioc4_other_int other_iec;	/* Other interrupt enable clear */
+	union ioc4_sio_cr {
+		uint32_t raw;
+		struct {
+			uint32_t cmd_pulse:4;	/* Bytebus strobe width */
+			uint32_t arb_diag:3;	/* PCI bus requester */
+			uint32_t sio_diag_idle:1;	/* Active ser req? */
+			uint32_t ata_diag_idle:1;	/* Active ATA req? */
+			uint32_t ata_diag_active:1;	/* ATA req is winner */
+			uint32_t reserved:22;	/* zero */
+		} fields;
+	} sio_cr;
+	uint32_t unused1;
+	union ioc4_int_out {
+		uint32_t raw;
+		struct {
+			uint32_t count:16;	/* Period control */
+			uint32_t mode:3;	/* Output signal shape */
+			uint32_t reserved:11;	/* zero */
+			uint32_t diag:1;	/* Timebase control */
+			uint32_t int_out:1;	/* Current value */
+		} fields;
+	} int_out;		/* External interrupt output control */
+	uint32_t unused2;
+	union ioc4_gpcr {
+		uint32_t raw;
+		struct {
+			uint32_t dir:8;	/* Pin direction */
+			uint32_t edge:8;	/* Edge/level mode */
+			uint32_t reserved1:4;	/* zero */
+			uint32_t int_out_en:1;	/* INT_OUT enable */
+			uint32_t reserved2:11;	/* zero */
+		} fields;
+	} gpcr_s;		/* Generic PIO control set */
+	union ioc4_gpcr gpcr_c;	/* Generic PIO control clear */
+	union ioc4_gpdr {
+		uint32_t raw;
+		struct {
+			uint32_t gen_pin:8;	/* State of pins */
+			uint32_t reserved:24;
+		} fields;
+	} gpdr;			/* Generic PIO data */
+	uint32_t unused3;
+	union ioc4_gppr {
+		uint32_t raw;
+		struct {
+			uint32_t gen_pin:1;	/* Single pin state */
+			uint32_t reserved:31;
+		} fields;
+	} gppr[8];		/* Generic PIO pins */
+};
+
+/* One of these per IOC4
+ *
+ * The idd_serial_data field is present here, even though it's used
+ * solely by the serial subdriver, because the main IOC4 module
+ * properly owns pci_{get,set}_drvdata functionality.  This field
+ * allows that subdriver to stash its own drvdata somewhere.
+ */
+struct ioc4_driver_data {
+	struct list_head idd_list;
+	unsigned long idd_bar0;
+	struct pci_dev *idd_pdev;
+	const struct pci_device_id *idd_pci_id;
+	struct __iomem ioc4_misc_regs *idd_misc_regs;
+	void *idd_serial_data;
+};
+
+/* One per submodule */
+struct ioc4_submodule {
+	struct list_head is_list;
+	char *is_name;
+	struct module *is_owner;
+	int (*is_probe) (struct ioc4_driver_data *);
+	int (*is_remove) (struct ioc4_driver_data *);
+};
+
+#define IOC4_NUM_CARDS		8	/* max cards per partition */
+
+/**********************************
+ * Functions needed by submodules *
+ **********************************/
+
+extern int ioc4_register_submodule(struct ioc4_submodule *);
+extern void ioc4_unregister_submodule(struct ioc4_submodule *);
+
+#endif				/* _LINUX_IOC4_H */
diff --git a/include/linux/ioc4_common.h b/include/linux/ioc4_common.h
deleted file mode 100644
index b03bcc46df55..000000000000
--- a/include/linux/ioc4_common.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-#ifndef _LINUX_IOC4_COMMON_H
-#define _LINUX_IOC4_COMMON_H
-
-/* prototypes */
-
-int ioc4_serial_init(void);
-
-int ioc4_serial_attach_one(struct pci_dev *pdev, const struct
-				pci_device_id *pci_id);
-int ioc4_ide_attach_one(struct pci_dev *pdev, const struct
-				pci_device_id *pci_id);
-
-#endif	/* _LINUX_IOC4_COMMON_H */
-- 
cgit v1.2.3


From d4c477ca5448f19afaaf6c0cfd655009ea9e614d Mon Sep 17 00:00:00 2001
From: Brent Casavant <bcasavan@sgi.com>
Date: Tue, 21 Jun 2005 17:16:01 -0700
Subject: [PATCH] ioc4: PCI bus speed detection

Several hardware features of SGI's IOC4 I/O controller chip require
timing-related driver calculations dependent upon the PCI bus speed.  This
patch enables the core IOC4 driver code to detect the actual bus speed and
store a value that can later be used by the IOC4 subdrivers as needed.

Signed-off-by: Brent Casavant <bcasavan@sgi.com>
Acked-by: Pat Gefre <pfg@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ioc4.h | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioc4.h b/include/linux/ioc4.h
index 729bfa4c4ac5..3dd18b785ebd 100644
--- a/include/linux/ioc4.h
+++ b/include/linux/ioc4.h
@@ -11,6 +11,14 @@
 
 #include <linux/interrupt.h>
 
+/***************
+ * Definitions *
+ ***************/
+
+/* Miscellaneous values inherent to hardware */
+
+#define IOC4_EXTINT_COUNT_DIVISOR 520	/* PCI clocks per COUNT tick */
+
 /***********************************
  * Structures needed by subdrivers *
  ***********************************/
@@ -119,19 +127,34 @@ struct ioc4_misc_regs {
 	} gppr[8];		/* Generic PIO pins */
 };
 
-/* One of these per IOC4
- *
- * The idd_serial_data field is present here, even though it's used
- * solely by the serial subdriver, because the main IOC4 module
- * properly owns pci_{get,set}_drvdata functionality.  This field
- * allows that subdriver to stash its own drvdata somewhere.
- */
+/* Masks for GPCR DIR pins */
+#define IOC4_GPCR_DIR_0 0x01	/* External interrupt output */
+#define IOC4_GPCR_DIR_1 0x02	/* External interrupt input */
+#define IOC4_GPCR_DIR_2 0x04
+#define IOC4_GPCR_DIR_3 0x08	/* Keyboard/mouse presence */
+#define IOC4_GPCR_DIR_4 0x10	/* Ser. port 0 xcvr select (0=232, 1=422) */
+#define IOC4_GPCR_DIR_5 0x20	/* Ser. port 1 xcvr select (0=232, 1=422) */
+#define IOC4_GPCR_DIR_6 0x40	/* Ser. port 2 xcvr select (0=232, 1=422) */
+#define IOC4_GPCR_DIR_7 0x80	/* Ser. port 3 xcvr select (0=232, 1=422) */
+
+/* Masks for GPCR EDGE pins */
+#define IOC4_GPCR_EDGE_0 0x01
+#define IOC4_GPCR_EDGE_1 0x02	/* External interrupt input */
+#define IOC4_GPCR_EDGE_2 0x04
+#define IOC4_GPCR_EDGE_3 0x08
+#define IOC4_GPCR_EDGE_4 0x10
+#define IOC4_GPCR_EDGE_5 0x20
+#define IOC4_GPCR_EDGE_6 0x40
+#define IOC4_GPCR_EDGE_7 0x80
+
+/* One of these per IOC4 */
 struct ioc4_driver_data {
 	struct list_head idd_list;
 	unsigned long idd_bar0;
 	struct pci_dev *idd_pdev;
 	const struct pci_device_id *idd_pci_id;
 	struct __iomem ioc4_misc_regs *idd_misc_regs;
+	unsigned long count_period;
 	void *idd_serial_data;
 };
 
-- 
cgit v1.2.3


From dbce706e2550253c5ab6043f4f5dfde0cd02470f Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Tue, 21 Jun 2005 17:16:19 -0700
Subject: [PATCH] uml: add and use generic hw_controller_type->release

With Chris Wedgwood <cw@f00f.org>

Currently UML must explicitly call the UML-specific
free_irq_by_irq_and_dev() for each free_irq call it's done.

This is needed because ->shutdown and/or ->disable are only called when the
last "action" for that irq is removed.

Instead, for UML shared IRQs (UML IRQs are very often, if not always,
shared), for each dev_id some setup is done, which must be cleared on the
release of that fd.  For instance, for each open console a new instance
(i.e.  new dev_id) of the same IRQ is requested().

Exactly, a fd is stored in an array (pollfds), which is after read by a
host thread and passed to poll().  Each event registered by poll() triggers
an interrupt.  So, for each free_irq() we must remove the corresponding
host fd from the table, which we do via this -release() method.

In this patch we add an appropriate hook for this, and remove all uses of
it by pointing the hook to the said procedure; this is safe to do since the
said procedure.

Also some cosmetic improvements are included.

This is heavily based on some work by Chris Wedgwood, which however didn't
get the patch merged for something I'd call a "misunderstanding" (the need
for this patch wasn't cleanly explained, thus adding the generic hook was
felt as undesirable).

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
CC: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index c3ff4d101667..b68ad80e2464 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -47,6 +47,7 @@ struct hw_interrupt_type {
 	void (*ack)(unsigned int irq);
 	void (*end)(unsigned int irq);
 	void (*set_affinity)(unsigned int irq, cpumask_t dest);
+	void (*release)(unsigned int irq, void *dev_id);
 };
 
 typedef struct hw_interrupt_type  hw_irq_controller;
-- 
cgit v1.2.3


From b77d6adc922b8bbf8b16b67f567958c42962cf88 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Tue, 21 Jun 2005 17:16:24 -0700
Subject: [PATCH] uml: make hw_controller_type->release exist only for archs
 needing it

With Chris Wedgwood <cw@f00f.org>

As suggested by Chris, we can make the "just added" method ->release
conditional to UML only (better: to archs requesting it, i.e.  only UML
currently), so that other archs don't get this unneeded crud, and if UML
won't need it any more we can kill this.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
CC: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b68ad80e2464..7fc1022be9ee 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -47,7 +47,10 @@ struct hw_interrupt_type {
 	void (*ack)(unsigned int irq);
 	void (*end)(unsigned int irq);
 	void (*set_affinity)(unsigned int irq, cpumask_t dest);
+	/* Currently used only by UML, might disappear one day.*/
+#ifdef CONFIG_IRQ_RELEASE_METHOD
 	void (*release)(unsigned int irq, void *dev_id);
+#endif
 };
 
 typedef struct hw_interrupt_type  hw_irq_controller;
-- 
cgit v1.2.3


From 8a96619145840c6eb50d85759f72d9337db411f7 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 21 Jun 2005 17:16:41 -0700
Subject: [PATCH] autofs4: subversion bump to identify these changes

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/auto_fs4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index a1657fb99516..9343c89d843c 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -23,7 +23,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	3
 #define AUTOFS_MAX_PROTO_VERSION	4
 
-#define AUTOFS_PROTO_SUBVERSION		6
+#define AUTOFS_PROTO_SUBVERSION		7
 
 /* Mask for expire behaviour */
 #define AUTOFS_EXP_IMMEDIATE		1
-- 
cgit v1.2.3


From f5a9951c94e7a285a3d00648e3d790a7f016bd11 Mon Sep 17 00:00:00 2001
From: James Simmons <jsimmons@pentafluge.infradead.org>
Date: Tue, 21 Jun 2005 17:16:58 -0700
Subject: [PATCH] fbdev: iomove removal

Since no one is using the inbuf, outbuf of struct fb_pixmap I removed their
use in the framebuffer console.  The idea is instead move the pixmap
functionality below the accelerated functions intead of on top as the way
it is now.  If there is no objection please apply.  This is against Linus
latestr GIT tree.  Thank you.

Signed-off-by: James Simmons <jsimmons@www.infradead.org>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fb.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index b468bf496547..e14942805dbd 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -524,11 +524,11 @@ struct fb_pixmap {
 	u32 offset;		/* current offset to buffer		*/
 	u32 buf_align;		/* byte alignment of each bitmap	*/
 	u32 scan_align;		/* alignment per scanline		*/
-	u32 access_align;	/* alignment per read/write		*/
+	u32 access_align;	/* alignment per read/write (bits)	*/
 	u32 flags;		/* see FB_PIXMAP_*			*/
 	/* access methods */
-	void (*outbuf)(struct fb_info *info, u8 *addr, u8 *src, unsigned int size);
-	u8   (*inbuf) (struct fb_info *info, u8 *addr);
+	void (*writeio)(struct fb_info *info, void __iomem *dst, void *src, unsigned int size);
+	void (*readio) (struct fb_info *info, void *dst, void __iomem *src, unsigned int size);
 };
 
 
@@ -816,12 +816,6 @@ extern int unregister_framebuffer(struct fb_info *fb_info);
 extern int fb_prepare_logo(struct fb_info *fb_info);
 extern int fb_show_logo(struct fb_info *fb_info);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
-extern void fb_iomove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf,
-				u8 *dst, u32 d_pitch, u8 *src, u32 idx,
-				u32 height, u32 shift_high, u32 shift_low, u32 mod);
-extern void fb_iomove_buf_aligned(struct fb_info *info, struct fb_pixmap *buf,
-				u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch,
-				u32 height);
 extern void fb_sysmove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf,
 				u8 *dst, u32 d_pitch, u8 *src, u32 idx,
 				u32 height, u32 shift_high, u32 shift_low, u32 mod);
-- 
cgit v1.2.3


From 1154ea7dcd8eed758fb5ec47393a79d5a1f0bc43 Mon Sep 17 00:00:00 2001
From: Jaya Kumar <jayalk@intworks.biz>
Date: Tue, 21 Jun 2005 17:17:04 -0700
Subject: [PATCH] Framebuffer driver for Arc LCD board

Add support for the Arc monochrome LCD board.

The board uses KS108 controllers to drive individual 64x64 LCD matrices.
The board can be paneled in a variety of setups such as 2x1=128x64,
4x4=256x256 and so on.  The board/host interface is through GPIO.

Signed-off-by: Jaya Kumar <jayalk@intworks.biz>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: <linux-fbdev-devel@lists.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/arcfb.h | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 include/linux/arcfb.h

(limited to 'include/linux')

diff --git a/include/linux/arcfb.h b/include/linux/arcfb.h
new file mode 100644
index 000000000000..721e7654daeb
--- /dev/null
+++ b/include/linux/arcfb.h
@@ -0,0 +1,8 @@
+#ifndef __LINUX_ARCFB_H__
+#define __LINUX_ARCFB_H__
+
+#define FBIO_WAITEVENT		_IO('F', 0x88)
+#define FBIO_GETCONTROL2	_IOR('F', 0x89, size_t)
+
+#endif
+
-- 
cgit v1.2.3


From d5881eb4883ef7dd28a4dcea237714817c4b2f8e Mon Sep 17 00:00:00 2001
From: James Simmons <jsimmons@www.infradead.org>
Date: Tue, 21 Jun 2005 17:17:05 -0700
Subject: [PATCH] fbdev: new pci id for chipsfb

Patch adds pci ID for CT 69000 chipset.

Signed-off-by: James Simmons <jsimmons@www.infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b8b4ebf9abf1..63e89e47b8e9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -575,6 +575,7 @@
 #define PCI_DEVICE_ID_CT_65550		0x00e0
 #define PCI_DEVICE_ID_CT_65554		0x00e4
 #define PCI_DEVICE_ID_CT_65555		0x00e5
+#define PCI_DEVICE_ID_CT_69000		0x00c0
 
 #define PCI_VENDOR_ID_MIRO		0x1031
 #define PCI_DEVICE_ID_MIRO_36050	0x5601
-- 
cgit v1.2.3


From 303b86d9913eca0cbfc3c5cb41e7006f6e13b755 Mon Sep 17 00:00:00 2001
From: Jurriaan <thunder7@xs4all.nl>
Date: Tue, 21 Jun 2005 17:17:06 -0700
Subject: [PATCH] New framebuffer fonts + updated 12x22 font available

Improve the fonts for use with the framebuffer.

I've added all the characters marked 'FIXME' in the sun12x22 font and
created a 10x18 font (based on the sun12x22 font) and a 7x14 font (based
on the vga8x16 font).

This patch is non-intrusive, no options are enabled by default so most
users won't notice a thing.

I am placing my changes under the GPL, however, I've not seen any copyright
notices on the sun12x22 font and the vga8x16 font which I derived my new
fonts from so I don't know what the copyright status is.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/font.h | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/font.h b/include/linux/font.h
index fc2d690c9d5f..8fc80a7d78ac 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -25,19 +25,23 @@ struct font_desc {
 #define VGA8x16_IDX	1
 #define PEARL8x8_IDX	2
 #define VGA6x11_IDX	3
-#define SUN8x16_IDX	4
-#define SUN12x22_IDX	5
-#define ACORN8x8_IDX	6
-#define	MINI4x6_IDX	7
+#define FONT7x14_IDX	4
+#define	FONT10x18_IDX	5
+#define SUN8x16_IDX	6
+#define SUN12x22_IDX	7
+#define ACORN8x8_IDX	8
+#define	MINI4x6_IDX	9
 
 extern struct font_desc	font_vga_8x8,
-				font_vga_8x16,
-				font_pearl_8x8,
-				font_vga_6x11,
-				font_sun_8x16,
-				font_sun_12x22,
-				font_acorn_8x8,
-				font_mini_4x6;
+			font_vga_8x16,
+			font_pearl_8x8,
+			font_vga_6x11,
+			font_7x14,
+			font_10x18,
+			font_sun_8x16,
+			font_sun_12x22,
+			font_acorn_8x8,
+			font_mini_4x6;
 
 /* Find a font with a specific name */
 
-- 
cgit v1.2.3


From f1ab5dac251bb4514607918b0019a3b3f5f5fb48 Mon Sep 17 00:00:00 2001
From: James Simmons <jsimmons@pentafluge.infradead.org>
Date: Tue, 21 Jun 2005 17:17:07 -0700
Subject: [PATCH] fbdev: stack reduction

Shrink the stack when calling the drawing alignment functions.

Signed-off-by: James Simmons <jsimmons@www.infradead.org>
Cc: "Antonino A. Daplas" <adaplas@hotpop.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fb.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index e14942805dbd..bc24beeed971 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -816,12 +816,9 @@ extern int unregister_framebuffer(struct fb_info *fb_info);
 extern int fb_prepare_logo(struct fb_info *fb_info);
 extern int fb_show_logo(struct fb_info *fb_info);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
-extern void fb_sysmove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf,
-				u8 *dst, u32 d_pitch, u8 *src, u32 idx,
+extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx,
 				u32 height, u32 shift_high, u32 shift_low, u32 mod);
-extern void fb_sysmove_buf_aligned(struct fb_info *info, struct fb_pixmap *buf,
-				u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch,
-				u32 height);
+extern void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height);
 extern void fb_set_suspend(struct fb_info *info, int state);
 extern int fb_get_color_depth(struct fb_var_screeninfo *var);
 extern int fb_get_options(char *name, char **option);
-- 
cgit v1.2.3


From 06d91a5fe0b50c9060e70bdf7786f8a3c66249db Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:12 -0700
Subject: [PATCH] md: improve locking on 'safemode' and move superblock writes

When md marks the superblock dirty before a write, it calls
generic_make_request (to write the superblock) from within
generic_make_request (to write the first dirty block), which could cause
problems later.

With this patch, the superblock write is always done by the helper thread, and
write request are delayed until that write completes.

Also, the locking around marking the array dirty and writing the superblock is
improved to avoid possible races.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md.h   | 2 +-
 include/linux/raid/md_k.h | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index a6a67d102bfa..cfde8f497d6d 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
 extern void md_unregister_thread (mdk_thread_t *thread);
 extern void md_wakeup_thread(mdk_thread_t *thread);
 extern void md_check_recovery(mddev_t *mddev);
-extern void md_write_start(mddev_t *mddev);
+extern int md_write_start(mddev_t *mddev, struct bio *bi);
 extern void md_write_end(mddev_t *mddev);
 extern void md_handle_safemode(mddev_t *mddev);
 extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index c9a0d4013be7..d92db54255a3 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -15,6 +15,9 @@
 #ifndef _MD_K_H
 #define _MD_K_H
 
+/* and dm-bio-list.h is not under include/linux because.... ??? */
+#include "../../../drivers/md/dm-bio-list.h"
+
 #define MD_RESERVED       0UL
 #define LINEAR            1UL
 #define RAID0             2UL
@@ -252,6 +255,10 @@ struct mddev_s
 	atomic_t			recovery_active; /* blocks scheduled, but not written */
 	wait_queue_head_t		recovery_wait;
 	sector_t			recovery_cp;
+
+	spinlock_t			write_lock;
+	struct bio_list			write_list;
+
 	unsigned int			safemode;	/* if set, update "clean" superblock
 							 * when no writes pending.
 							 */ 
-- 
cgit v1.2.3


From 57afd89f98a990747445f01c458ecae64263b2f8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:13 -0700
Subject: [PATCH] md: improve the interface to sync_request

1/ change the return value (which is number-of-sectors synced)
 from 'int' to 'sector_t'.
 The number of sectors is usually easily small enough to fit
 in an int, but if resync needs to abort, it may want to return
 the total number of remaining sectors, which could be large.
 Also errors cannot be returned as negative numbers now, so use
 0 instead
2/ Add a 'skipped' return parameter to allow the array to report
 that it skipped the sectors.  This allows md to take this into account
 in the speed calculations.
 Currently there is no important skipping, but the bitmap-based-resync
 that is coming will use this.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md_k.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index d92db54255a3..bce0032decff 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -298,7 +298,7 @@ struct mdk_personality_s
 	int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
 	int (*hot_remove_disk) (mddev_t *mddev, int number);
 	int (*spare_active) (mddev_t *mddev);
-	int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster);
+	sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
 	int (*resize) (mddev_t *mddev, sector_t sectors);
 	int (*reshape) (mddev_t *mddev, int raid_disks);
 	int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
-- 
cgit v1.2.3


From 32a7627cf3a35396a8e834faf34e38ae9f3b1309 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:14 -0700
Subject: [PATCH] md: optimised resync using Bitmap based intent logging

With this patch, the intent to write to some block in the array can be logged
to a bitmap file.  Each bit represents some number of sectors and is set
before any update happens, and only cleared when all writes relating to all
sectors are complete.

After an unclean shutdown, information in this bitmap can be used to optimise
resync - only sectors which could be out-of-sync need to be updated.

Also if a drive is removed and then added back into an array, the recovery can
make use of the bitmap to optimise reconstruction.  This is not implemented in
this patch.

Currently the bitmap is stored in a file which must (obviously) be stored on a
separate device.

The patch only provided infrastructure.  It does not update any personalities
to bitmap intent logging.

Md arrays can still be used with no bitmap file.  This patch has minimal
impact on such arrays.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/bitmap.h | 280 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/raid/md_k.h   |   4 +
 include/linux/raid/md_u.h   |   7 ++
 3 files changed, 291 insertions(+)
 create mode 100644 include/linux/raid/bitmap.h

(limited to 'include/linux')

diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
new file mode 100644
index 000000000000..f785cf26cbad
--- /dev/null
+++ b/include/linux/raid/bitmap.h
@@ -0,0 +1,280 @@
+/*
+ * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
+ *
+ * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
+ */
+#ifndef BITMAP_H
+#define BITMAP_H 1
+
+#define BITMAP_MAJOR 3
+#define BITMAP_MINOR 38
+
+/*
+ * in-memory bitmap:
+ *
+ * Use 16 bit block counters to track pending writes to each "chunk".
+ * The 2 high order bits are special-purpose, the first is a flag indicating
+ * whether a resync is needed.  The second is a flag indicating whether a
+ * resync is active.
+ * This means that the counter is actually 14 bits:
+ *
+ * +--------+--------+------------------------------------------------+
+ * | resync | resync |               counter                          |
+ * | needed | active |                                                |
+ * |  (0-1) |  (0-1) |              (0-16383)                         |
+ * +--------+--------+------------------------------------------------+
+ *
+ * The "resync needed" bit is set when:
+ *    a '1' bit is read from storage at startup.
+ *    a write request fails on some drives
+ *    a resync is aborted on a chunk with 'resync active' set
+ * It is cleared (and resync-active set) when a resync starts across all drives
+ * of the chunk.
+ *
+ *
+ * The "resync active" bit is set when:
+ *    a resync is started on all drives, and resync_needed is set.
+ *       resync_needed will be cleared (as long as resync_active wasn't already set).
+ * It is cleared when a resync completes.
+ *
+ * The counter counts pending write requests, plus the on-disk bit.
+ * When the counter is '1' and the resync bits are clear, the on-disk
+ * bit can be cleared aswell, thus setting the counter to 0.
+ * When we set a bit, or in the counter (to start a write), if the fields is
+ * 0, we first set the disk bit and set the counter to 1.
+ *
+ * If the counter is 0, the on-disk bit is clear and the stipe is clean
+ * Anything that dirties the stipe pushes the counter to 2 (at least)
+ * and sets the on-disk bit (lazily).
+ * If a periodic sweep find the counter at 2, it is decremented to 1.
+ * If the sweep find the counter at 1, the on-disk bit is cleared and the
+ * counter goes to zero.
+ *
+ * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
+ * counters as a fallback when "page" memory cannot be allocated:
+ *
+ * Normal case (page memory allocated):
+ *
+ *     page pointer (32-bit)
+ *
+ *     [ ] ------+
+ *               |
+ *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
+ *                          c1   c2    c2048
+ *
+ * Hijacked case (page memory allocation failed):
+ *
+ *     hijacked page pointer (32-bit)
+ *
+ *     [		  ][		  ] (no page memory allocated)
+ *      counter #1 (16-bit) counter #2 (16-bit)
+ *
+ */
+
+#ifdef __KERNEL__
+
+#define PAGE_BITS (PAGE_SIZE << 3)
+#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
+
+typedef __u16 bitmap_counter_t;
+#define COUNTER_BITS 16
+#define COUNTER_BIT_SHIFT 4
+#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
+#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
+
+#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
+#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
+#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
+#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
+#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
+#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
+
+/* how many counters per page? */
+#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
+/* same, except a shift value for more efficient bitops */
+#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
+/* same, except a mask value for more efficient bitops */
+#define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
+
+#define BITMAP_BLOCK_SIZE 512
+#define BITMAP_BLOCK_SHIFT 9
+
+/* how many blocks per chunk? (this is variable) */
+#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
+
+/* when hijacked, the counters and bits represent even larger "chunks" */
+/* there will be 1024 chunks represented by each counter in the page pointers */
+#define PAGEPTR_BLOCK_RATIO(bitmap) \
+			(CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
+#define PAGEPTR_BLOCK_SHIFT(bitmap) \
+			(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
+#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
+
+/*
+ * on-disk bitmap:
+ *
+ * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
+ * file a page at a time. There's a superblock at the start of the file.
+ */
+
+/* map chunks (bits) to file pages - offset by the size of the superblock */
+#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
+
+#endif
+
+/*
+ * bitmap structures:
+ */
+
+#define BITMAP_MAGIC 0x6d746962
+
+/* use these for bitmap->flags and bitmap->sb->state bit-fields */
+enum bitmap_state {
+	BITMAP_ACTIVE = 0x001, /* the bitmap is in use */
+	BITMAP_STALE  = 0x002  /* the bitmap file is out of date or had -EIO */
+};
+
+/* the superblock at the front of the bitmap file -- little endian */
+typedef struct bitmap_super_s {
+	__u32 magic;        /*  0  BITMAP_MAGIC */
+	__u32 version;      /*  4  the bitmap major for now, could change... */
+	__u8  uuid[16];     /*  8  128 bit uuid - must match md device uuid */
+	__u64 events;       /* 24  event counter for the bitmap (1)*/
+	__u64 events_cleared;/*32  event counter when last bit cleared (2) */
+	__u64 sync_size;    /* 40  the size of the md device's sync range(3) */
+	__u32 state;        /* 48  bitmap state information */
+	__u32 chunksize;    /* 52  the bitmap chunk size in bytes */
+	__u32 daemon_sleep; /* 56  seconds between disk flushes */
+
+	__u8  pad[256 - 60]; /* set to zero */
+} bitmap_super_t;
+
+/* notes:
+ * (1) This event counter is updated before the eventcounter in the md superblock
+ *    When a bitmap is loaded, it is only accepted if this event counter is equal
+ *    to, or one greater than, the event counter in the superblock.
+ * (2) This event counter is updated when the other one is *if*and*only*if* the
+ *    array is not degraded.  As bits are not cleared when the array is degraded,
+ *    this represents the last time that any bits were cleared.
+ *    If a device is being added that has an event count with this value or
+ *    higher, it is accepted as conforming to the bitmap.
+ * (3)This is the number of sectors represented by the bitmap, and is the range that
+ *    resync happens across.  For raid1 and raid5/6 it is the size of individual
+ *    devices.  For raid10 it is the size of the array.
+ */
+
+#ifdef __KERNEL__
+
+/* the in-memory bitmap is represented by bitmap_pages */
+struct bitmap_page {
+	/*
+	 * map points to the actual memory page
+	 */
+	char *map;
+	/*
+	 * in emergencies (when map cannot be alloced), hijack the map
+	 * pointer and use it as two counters itself
+	 */
+	unsigned int hijacked:1;
+	/*
+	 * count of dirty bits on the page
+	 */
+	unsigned int  count:31;
+};
+
+/* keep track of bitmap file pages that have pending writes on them */
+struct page_list {
+	struct list_head list;
+	struct page *page;
+};
+
+/* the main bitmap structure - one per mddev */
+struct bitmap {
+	struct bitmap_page *bp;
+	unsigned long pages; /* total number of pages in the bitmap */
+	unsigned long missing_pages; /* number of pages not yet allocated */
+
+	mddev_t *mddev; /* the md device that the bitmap is for */
+
+	int counter_bits; /* how many bits per block counter */
+
+	/* bitmap chunksize -- how much data does each bit represent? */
+	unsigned long chunksize;
+	unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
+	unsigned long chunks; /* total number of data chunks for the array */
+
+	/* We hold a count on the chunk currently being synced, and drop
+	 * it when the last block is started.  If the resync is aborted
+	 * midway, we need to be able to drop that count, so we remember
+	 * the counted chunk..
+	 */
+	unsigned long syncchunk;
+
+	__u64	events_cleared;
+
+	/* bitmap spinlock */
+	spinlock_t lock;
+
+	struct file *file; /* backing disk file */
+	struct page *sb_page; /* cached copy of the bitmap file superblock */
+	struct page **filemap; /* list of cache pages for the file */
+	unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
+	unsigned long file_pages; /* number of pages in the file */
+
+	unsigned long flags;
+
+	/*
+	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
+	 * file, cleaning up bits and flushing out pages to disk as necessary
+	 */
+	unsigned long daemon_lastrun; /* jiffies of last run */
+	unsigned long daemon_sleep; /* how many seconds between updates? */
+
+	/*
+	 * bitmap write daemon - this daemon performs writes to the bitmap file
+	 * this thread is only needed because of a limitation in ext3 (jbd)
+	 * that does not allow a task to have two journal transactions ongoing
+	 * simultaneously (even if the transactions are for two different
+	 * filesystems) -- in the case of bitmap, that would be the filesystem
+	 * that the bitmap file resides on and the filesystem that is mounted
+	 * on the md device -- see current->journal_info in jbd/transaction.c
+	 */
+	mdk_thread_t *writeback_daemon;
+	spinlock_t write_lock;
+	struct semaphore write_ready;
+	struct semaphore write_done;
+	unsigned long writes_pending;
+	wait_queue_head_t write_wait;
+	struct list_head write_pages;
+	struct list_head complete_pages;
+	mempool_t *write_pool;
+};
+
+/* the bitmap API */
+
+/* these are used only by md/bitmap */
+int  bitmap_create(mddev_t *mddev);
+void bitmap_destroy(mddev_t *mddev);
+int  bitmap_active(struct bitmap *bitmap);
+
+char *file_path(struct file *file, char *buf, int count);
+void bitmap_print_sb(struct bitmap *bitmap);
+int bitmap_update_sb(struct bitmap *bitmap);
+
+int  bitmap_setallbits(struct bitmap *bitmap);
+
+/* these are exported */
+int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
+void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
+		     int success);
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks);
+void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
+void bitmap_close_sync(struct bitmap *bitmap);
+
+int bitmap_unplug(struct bitmap *bitmap);
+int bitmap_daemon_work(struct bitmap *bitmap);
+#endif
+
+#endif
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index bce0032decff..16e94a9f0f8c 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -267,6 +267,9 @@ struct mddev_s
 	atomic_t			writes_pending; 
 	request_queue_t			*queue;	/* for plugging ... */
 
+	struct bitmap                   *bitmap; /* the bitmap for the device */
+	struct file			*bitmap_file; /* the bitmap file */
+
 	struct list_head		all_mddevs;
 };
 
@@ -341,6 +344,7 @@ typedef struct mdk_thread_s {
 	unsigned long           flags;
 	struct completion	*event;
 	struct task_struct	*tsk;
+	unsigned long		timeout;
 	const char		*name;
 } mdk_thread_t;
 
diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h
index a2df5c2a42af..81da20ccec4d 100644
--- a/include/linux/raid/md_u.h
+++ b/include/linux/raid/md_u.h
@@ -23,6 +23,7 @@
 #define GET_DISK_INFO		_IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
 #define PRINT_RAID_DEBUG	_IO (MD_MAJOR, 0x13)
 #define RAID_AUTORUN		_IO (MD_MAJOR, 0x14)
+#define GET_BITMAP_FILE		_IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t)
 
 /* configuration */
 #define CLEAR_ARRAY		_IO (MD_MAJOR, 0x20)
@@ -36,6 +37,7 @@
 #define HOT_ADD_DISK		_IO (MD_MAJOR, 0x28)
 #define SET_DISK_FAULTY		_IO (MD_MAJOR, 0x29)
 #define HOT_GENERATE_ERROR	_IO (MD_MAJOR, 0x2a)
+#define SET_BITMAP_FILE		_IOW (MD_MAJOR, 0x2b, int)
 
 /* usage */
 #define RUN_ARRAY		_IOW (MD_MAJOR, 0x30, mdu_param_t)
@@ -106,6 +108,11 @@ typedef struct mdu_start_info_s {
 
 } mdu_start_info_t;
 
+typedef struct mdu_bitmap_file_s
+{
+	char pathname[4096];
+} mdu_bitmap_file_t;
+
 typedef struct mdu_param_s
 {
 	int			personality;	/* 1,2,3,4 */
-- 
cgit v1.2.3


From 77ad4bc706fe6c52ab953f31c287a6af712d080c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:21 -0700
Subject: [PATCH] md: enable the bitmap write-back daemon and wait for it.

Currently we don't wait for updates to the bitmap to be flushed to disk
properly.  The infrastructure all there, but it isn't being used....

A separate kernel thread (bitmap_writeback_daemon) is needed to wait for each
page as we cannot get callbacks when a page write completes.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/bitmap.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index f785cf26cbad..cfe60cfc8f3d 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -233,21 +233,12 @@ struct bitmap {
 	unsigned long daemon_sleep; /* how many seconds between updates? */
 
 	/*
-	 * bitmap write daemon - this daemon performs writes to the bitmap file
-	 * this thread is only needed because of a limitation in ext3 (jbd)
-	 * that does not allow a task to have two journal transactions ongoing
-	 * simultaneously (even if the transactions are for two different
-	 * filesystems) -- in the case of bitmap, that would be the filesystem
-	 * that the bitmap file resides on and the filesystem that is mounted
-	 * on the md device -- see current->journal_info in jbd/transaction.c
+	 * bitmap_writeback_daemon waits for file-pages that have been written,
+	 * as there is no way to get a call-back when a page write completes.
 	 */
 	mdk_thread_t *writeback_daemon;
 	spinlock_t write_lock;
-	struct semaphore write_ready;
-	struct semaphore write_done;
-	unsigned long writes_pending;
 	wait_queue_head_t write_wait;
-	struct list_head write_pages;
 	struct list_head complete_pages;
 	mempool_t *write_pool;
 };
-- 
cgit v1.2.3


From 191ea9b2c7cc3ebbe0678834ab710d7d95ad3f9a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:23 -0700
Subject: [PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/raid1.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h
index abbfdd9afe1e..9d93cf12e890 100644
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -36,12 +36,21 @@ struct r1_private_data_s {
 	spinlock_t		device_lock;
 
 	struct list_head	retry_list;
+	/* queue pending writes and submit them on unplug */
+	struct bio_list		pending_bio_list;
+	/* queue of writes that have been unplugged */
+	struct bio_list		flushing_bio_list;
+
 	/* for use when syncing mirrors: */
 
 	spinlock_t		resync_lock;
-	int nr_pending;
-	int barrier;
+	int			nr_pending;
+	int			barrier;
 	sector_t		next_resync;
+	int			fullsync;  /* set to 1 if a full sync is needed,
+					    * (fresh device added).
+					    * Cleared when a sync completes.
+					    */
 
 	wait_queue_head_t	wait_idle;
 	wait_queue_head_t	wait_resume;
@@ -85,14 +94,17 @@ struct r1bio_s {
 	int			read_disk;
 
 	struct list_head	retry_list;
+	struct bitmap_update	*bitmap_update;
 	/*
 	 * if the IO is in WRITE direction, then multiple bios are used.
 	 * We choose the number when they are allocated.
 	 */
 	struct bio		*bios[0];
+	/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
 };
 
 /* bits for r1bio.state */
 #define	R1BIO_Uptodate	0
 #define	R1BIO_IsSync	1
+#define	R1BIO_Degraded	2
 #endif
-- 
cgit v1.2.3


From 41158c7eb22312cfaa256744e1553bb4042ff085 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:25 -0700
Subject: [PATCH] md: optimise reconstruction when re-adding a recently failed
 drive.

When an array is degraded, bit in the intent-bitmap are never cleared.  So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.

This patch adds support for this re-adding.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md_k.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 16e94a9f0f8c..6cdcb4434c6c 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -183,6 +183,10 @@ struct mdk_rdev_s
 
 	int desc_nr;			/* descriptor index in the superblock */
 	int raid_disk;			/* role of device in array */
+	int saved_raid_disk;		/* role that device used to have in the
+					 * array and could again if we did a partial
+					 * resync from the bitmap
+					 */
 
 	atomic_t	nr_pending;	/* number of pending requests.
 					 * only maintained for arrays that
-- 
cgit v1.2.3


From 3d310eb7b3df1252e8595d059d982b0a9825a137 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:26 -0700
Subject: [PATCH] md: fix deadlock due to md thread processing delayed
 requests.

Before completing a 'write' the md superblock might need to be updated.
This is best done by the md_thread.

The current code schedules this up and queues the write request for later
handling by the md_thread.

However some personalities (Raid5/raid6) will deadlock if the md_thread
tries to submit requests to its own array.

So this patch changes things so the processes submitting the request waits
for the superblock to be written and then submits the request itself.

This fixes a recently-created deadlock in raid5/raid6

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md.h   | 2 +-
 include/linux/raid/md_k.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index cfde8f497d6d..75f41d8faed2 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
 extern void md_unregister_thread (mdk_thread_t *thread);
 extern void md_wakeup_thread(mdk_thread_t *thread);
 extern void md_check_recovery(mddev_t *mddev);
-extern int md_write_start(mddev_t *mddev, struct bio *bi);
+extern void md_write_start(mddev_t *mddev, struct bio *bi);
 extern void md_write_end(mddev_t *mddev);
 extern void md_handle_safemode(mddev_t *mddev);
 extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 6cdcb4434c6c..3e977025cf43 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -261,7 +261,7 @@ struct mddev_s
 	sector_t			recovery_cp;
 
 	spinlock_t			write_lock;
-	struct bio_list			write_list;
+	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
 
 	unsigned int			safemode;	/* if set, update "clean" superblock
 							 * when no writes pending.
-- 
cgit v1.2.3


From a654b9d8f851f4ca02649d5825cbe6c608adb10c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:27 -0700
Subject: [PATCH] md: allow md intent bitmap to be stored near the superblock.

This provides an alternate to storing the bitmap in a separate file.  The
bitmap can be stored at a given offset from the superblock.  Obviously the
creator of the array must make sure this doesn't intersect with data....
After is good for version-0.90 superblocks.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/bitmap.h |  2 ++
 include/linux/raid/md.h     | 15 ++++++++++++++-
 include/linux/raid/md_k.h   |  4 ++++
 include/linux/raid/md_p.h   |  7 ++++++-
 4 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index cfe60cfc8f3d..e24b74b11150 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -217,6 +217,7 @@ struct bitmap {
 	/* bitmap spinlock */
 	spinlock_t lock;
 
+	long offset; /* offset from superblock if file is NULL */
 	struct file *file; /* backing disk file */
 	struct page *sb_page; /* cached copy of the bitmap file superblock */
 	struct page **filemap; /* list of cache pages for the file */
@@ -255,6 +256,7 @@ void bitmap_print_sb(struct bitmap *bitmap);
 int bitmap_update_sb(struct bitmap *bitmap);
 
 int  bitmap_setallbits(struct bitmap *bitmap);
+void bitmap_write_all(struct bitmap *bitmap);
 
 /* these are exported */
 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index 75f41d8faed2..ffa316ce4dc8 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -60,7 +60,14 @@
  */
 #define MD_MAJOR_VERSION                0
 #define MD_MINOR_VERSION                90
-#define MD_PATCHLEVEL_VERSION           1
+/*
+ * MD_PATCHLEVEL_VERSION indicates kernel functionality.
+ * >=1 means different superblock formats are selectable using SET_ARRAY_INFO
+ *     and major_version/minor_version accordingly
+ * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
+ *     in the super status byte
+ */
+#define MD_PATCHLEVEL_VERSION           2
 
 extern int register_md_personality (int p_num, mdk_personality_t *p);
 extern int unregister_md_personality (int p_num);
@@ -78,6 +85,12 @@ extern void md_unplug_mddev(mddev_t *mddev);
 
 extern void md_print_devices (void);
 
+extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
+			   sector_t sector, int size, struct page *page);
+extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
+			struct page *page, int rw);
+
+
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
 #endif 
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 3e977025cf43..a3725b57fb7d 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -273,6 +273,10 @@ struct mddev_s
 
 	struct bitmap                   *bitmap; /* the bitmap for the device */
 	struct file			*bitmap_file; /* the bitmap file */
+	long				bitmap_offset; /* offset from superblock of
+							* start of bitmap. May be
+							* negative, but not '0'
+							*/
 
 	struct list_head		all_mddevs;
 };
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 8ba95d67329f..8e592a25a8b5 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -96,6 +96,7 @@ typedef struct mdp_device_descriptor_s {
 #define MD_SB_CLEAN		0
 #define MD_SB_ERRORS		1
 
+#define	MD_SB_BITMAP_PRESENT	8 /* bitmap may be present nearby */
 typedef struct mdp_superblock_s {
 	/*
 	 * Constant generic information
@@ -184,7 +185,7 @@ struct mdp_superblock_1 {
 	/* constant array information - 128 bytes */
 	__u32	magic;		/* MD_SB_MAGIC: 0xa92b4efc - little endian */
 	__u32	major_version;	/* 1 */
-	__u32	feature_map;	/* 0 for now */
+	__u32	feature_map;	/* bit 0 set if 'bitmap_offset' is meaningful */
 	__u32	pad0;		/* always set to 0 when writing */
 
 	__u8	set_uuid[16];	/* user-space generated. */
@@ -197,6 +198,10 @@ struct mdp_superblock_1 {
 
 	__u32	chunksize;	/* in 512byte sectors */
 	__u32	raid_disks;
+	__u32	bitmap_offset;	/* sectors after start of superblock that bitmap starts
+				 * NOTE: signed, so bitmap can be before superblock
+				 * only meaningful of feature_map[0] is set.
+				 */
 	__u8	pad1[128-96];	/* set to 0 when written */
 
 	/* constant this-device information - 64 bytes */
-- 
cgit v1.2.3


From 7bfa19f2748000d646dbdf8f48258cfe1d257b52 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:28 -0700
Subject: [PATCH] md: allow md to update multiple superblocks in parallel.

currently, md updates all superblocks (one on each device) in series.  It
waits for one write to complete before starting the next.  This isn't a big
problem as superblock updates don't happen that often.

However it is neater to do it in parallel, and if the drives in the array have
gone to "sleep" after a period of idleness, then waking them is parallel is
faster (and someone else should be worrying about power drain).

Futher, we will need parallel superblock updates for a future patch which
keeps the intent-logging bitmap near the superblock.

Also remove the silly code that retired superblock updates 100 times.  This
simply never made sense.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md_k.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index a3725b57fb7d..8c14ba565a45 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -262,6 +262,7 @@ struct mddev_s
 
 	spinlock_t			write_lock;
 	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
+	atomic_t			pending_writes;	/* number of active superblock writes */
 
 	unsigned int			safemode;	/* if set, update "clean" superblock
 							 * when no writes pending.
-- 
cgit v1.2.3


From 39730960d94306d7be414e8d54f4e5c071af1278 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:28 -0700
Subject: [PATCH] Two small fixes for md verion-1 superblocks.

1/ Must typecast int to (sector_t) before inverting or we
 might not invert enough bits.

2/ When "bitmap_offset" was added to mdp_superblock_1, we didn't increase
   the count of words-used (96 to 100).

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md_p.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 8e592a25a8b5..dc65cd435494 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -202,7 +202,7 @@ struct mdp_superblock_1 {
 				 * NOTE: signed, so bitmap can be before superblock
 				 * only meaningful of feature_map[0] is set.
 				 */
-	__u8	pad1[128-96];	/* set to 0 when written */
+	__u8	pad1[128-100];	/* set to 0 when written */
 
 	/* constant this-device information - 64 bytes */
 	__u64	data_offset;	/* sector start of data, often 0 */
-- 
cgit v1.2.3


From b3d5496ea5915fa4848fe307af9f7097f312e932 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 2 Apr 2005 20:31:02 +0200
Subject: [PATCH] I2C: Kill address ranges in non-sensors i2c chip drivers

Some months ago, you killed the address ranges mechanism from all
sensors i2c chip drivers (both the module parameters and the in-code
address lists). I think it was a very good move, as the ranges can
easily be replaced by individual addresses, and this allowed for
significant cleanups in the i2c core (let alone the impressive size
shrink for all these drivers).

Unfortunately you did not do the same for non-sensors i2c chip drivers.
These need the address ranges even less, so we could get rid of the
ranges here as well for another significant i2c core cleanup. Here comes
a patch which does just that. Since the process is exactly the same as
what you did for the other drivers set already, I did not split this one
in parts.

A documentation update is included.

The change saves 308 bytes in the i2c core, and an average 1382 bytes
for chip drivers which use I2C_CLIENT_INSMOD, 126 bytes for those which
do not.

This change is required if we want to merge the sensors and non-sensors
i2c code (and we want to do this).

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

Index: gregkh-2.6/Documentation/i2c/writing-clients
===================================================================
---
 include/linux/i2c.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index ebcd745f4cd6..be837b13f297 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -290,11 +290,8 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
  */
 struct i2c_client_address_data {
 	unsigned short *normal_i2c;
-	unsigned short *normal_i2c_range;
 	unsigned short *probe;
-	unsigned short *probe_range;
 	unsigned short *ignore;
-	unsigned short *ignore_range;
 	unsigned short *force;
 };
 
@@ -563,24 +560,15 @@ union i2c_smbus_data {
 #define I2C_CLIENT_INSMOD \
   I2C_CLIENT_MODULE_PARM(probe, \
                       "List of adapter,address pairs to scan additionally"); \
-  I2C_CLIENT_MODULE_PARM(probe_range, \
-                      "List of adapter,start-addr,end-addr triples to scan " \
-                      "additionally"); \
   I2C_CLIENT_MODULE_PARM(ignore, \
                       "List of adapter,address pairs not to scan"); \
-  I2C_CLIENT_MODULE_PARM(ignore_range, \
-                      "List of adapter,start-addr,end-addr triples not to " \
-                      "scan"); \
   I2C_CLIENT_MODULE_PARM(force, \
                       "List of adapter,address pairs to boldly assume " \
                       "to be present"); \
 	static struct i2c_client_address_data addr_data = {		\
 			.normal_i2c = 		normal_i2c,		\
-			.normal_i2c_range =	normal_i2c_range,	\
 			.probe =		probe,			\
-			.probe_range =		probe_range,		\
 			.ignore =		ignore,			\
-			.ignore_range =		ignore_range,		\
 			.force =		force,			\
 		}
 
-- 
cgit v1.2.3


From 3886246a257e828248ce1e72ced00408a3557f0d Mon Sep 17 00:00:00 2001
From: Sebastian Witt <se.witt@gmx.net>
Date: Wed, 13 Apr 2005 22:25:39 +0200
Subject: [PATCH] I2C: i2c-vid.h: Support for VID to reg conversion

Adds conversion from VID (mV) to register value. Used by the atxp1 I2C module.
Removed uneeded switch case.

Signed-off-by: Sebastian Witt <se.witt@gmx.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c-vid.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-vid.h b/include/linux/i2c-vid.h
index 974835e3530f..41d0635e0ba9 100644
--- a/include/linux/i2c-vid.h
+++ b/include/linux/i2c-vid.h
@@ -97,3 +97,15 @@ static inline int vid_from_reg(int val, int vrm)
 		                     2050 - (val) * 50);
 	}
 }
+
+static inline int vid_to_reg(int val, int vrm)
+{
+	switch (vrm) {
+	case 91:		/* VRM 9.1 */
+	case 90:		/* VRM 9.0 */
+		return ((val >= 1100) && (val <= 1850) ?
+			((18499 - val * 10) / 25 + 5) / 10 : -1);
+	default:
+		return -1;
+	}
+}
-- 
cgit v1.2.3


From 10c08f8100ee2c4d27b862635574cdf4ef439e67 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 6 Jun 2005 19:34:45 +0200
Subject: [PATCH] I2C: rename i2c-sysfs.h to hwmon-sysfs.h

This patch renames the new linux/i2c-sysfs.h header file to
linux/hwmon-sysfs.h. This names seems to be more appropriate since this
file defines macros and structures not related to i2c but to hardware
monitoring drivers. The patch also updates the five hardware monitoring
driver which include that header file already.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/hwmon-sysfs.h | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/i2c-sysfs.h   | 36 ------------------------------------
 2 files changed, 36 insertions(+), 36 deletions(-)
 create mode 100644 include/linux/hwmon-sysfs.h
 delete mode 100644 include/linux/i2c-sysfs.h

(limited to 'include/linux')

diff --git a/include/linux/hwmon-sysfs.h b/include/linux/hwmon-sysfs.h
new file mode 100644
index 000000000000..1b5018a965f5
--- /dev/null
+++ b/include/linux/hwmon-sysfs.h
@@ -0,0 +1,36 @@
+/*
+ *  hwmon-sysfs.h - hardware monitoring chip driver sysfs defines
+ *
+ *  Copyright (C) 2005 Yani Ioannou <yani.ioannou@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _LINUX_HWMON_SYSFS_H
+#define _LINUX_HWMON_SYSFS_H
+
+struct sensor_device_attribute{
+	struct device_attribute dev_attr;
+	int index;
+};
+#define to_sensor_dev_attr(_dev_attr) \
+	container_of(_dev_attr, struct sensor_device_attribute, dev_attr)
+
+#define SENSOR_DEVICE_ATTR(_name,_mode,_show,_store,_index)	\
+struct sensor_device_attribute sensor_dev_attr_##_name = {	\
+	.dev_attr =	__ATTR(_name,_mode,_show,_store),	\
+	.index =	_index,					\
+}
+
+#endif /* _LINUX_HWMON_SYSFS_H */
diff --git a/include/linux/i2c-sysfs.h b/include/linux/i2c-sysfs.h
deleted file mode 100644
index d7bf6ce11679..000000000000
--- a/include/linux/i2c-sysfs.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *  i2c-sysfs.h - i2c chip driver sysfs defines
- *
- *  Copyright (C) 2005 Yani Ioannou <yani.ioannou@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#ifndef _LINUX_I2C_SYSFS_H
-#define _LINUX_I2C_SYSFS_H
-
-struct sensor_device_attribute{
-	struct device_attribute dev_attr;
-	int index;
-};
-#define to_sensor_dev_attr(_dev_attr) \
-	container_of(_dev_attr, struct sensor_device_attribute, dev_attr)
-
-#define SENSOR_DEVICE_ATTR(_name,_mode,_show,_store,_index)	\
-struct sensor_device_attribute sensor_dev_attr_##_name = {	\
-	.dev_attr =	__ATTR(_name,_mode,_show,_store),	\
-	.index =	_index,					\
-}
-
-#endif /* _LINUX_I2C_SYSFS_H */
-- 
cgit v1.2.3


From c124a78d8c7475ecc43f385f34112b638c4228d9 Mon Sep 17 00:00:00 2001
From: Randy Vinson <rvinson@mvista.com>
Date: Fri, 3 Jun 2005 14:36:06 -0700
Subject: [PATCH] I2C: Add support for Maxim/Dallas DS1374 Real-Time Clock Chip
 (1/2)

Add support for Maxim/Dallas DS1374 Real-Time Clock Chip

This change adds support for the Maxim/Dallas DS1374 RTC chip. This chip
is an I2C-based RTC that maintains a simple 32-bit binary seconds count
with battery backup support.

Signed-off-by: Randy Vinson <rvinson@mvista.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c-id.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index 89270ce51470..33f08258f22b 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -108,6 +108,7 @@
 #define I2C_DRIVERID_TDA7313	62	/* TDA7313 audio processor	*/
 #define I2C_DRIVERID_MAX6900	63	/* MAX6900 real-time clock	*/
 #define I2C_DRIVERID_SAA7114H	64	/* video decoder		*/
+#define I2C_DRIVERID_DS1374	65	/* DS1374 real time clock	*/
 
 
 #define I2C_DRIVERID_EXP0	0xF0	/* experimental use id's	*/
-- 
cgit v1.2.3


From 5ee0ed7d3ab620a764740fb018f469d45f561931 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:20 +0000
Subject: [PATCH] RPC: Make rpc_create_client() probe server for RPC
 program+version support

 Ensure that we don't create an RPC client without checking that the server
 does indeed support the RPC program + version that we are trying to set up.

 This enables us to immediately return an error to "mount" if it turns out
 that the server is only supporting NFSv2, when we requested NFSv3 or NFSv4.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 2709caf4d128..d25e80f77ff5 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -111,6 +111,9 @@ struct rpc_procinfo {
 struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
 				struct rpc_program *info,
 				u32 version, rpc_authflavor_t authflavor);
+struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname,
+				struct rpc_program *info,
+				u32 version, rpc_authflavor_t authflavor);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
@@ -129,6 +132,7 @@ void		rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
+int		rpc_ping(struct rpc_clnt *clnt, int flags);
 
 static __inline__
 int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-- 
cgit v1.2.3


From 4ce79717ce32a9f88c1ddce4b9658556cb59d37a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: Header file cleanup...

 - Move NFSv4 state definitions into a private header file.
 - Clean up gunk in nfs_fs.h

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 241 -------------------------------------------------
 1 file changed, 241 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index dbac7f363e5d..fb33e7655cfa 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -15,7 +15,6 @@
 #include <linux/pagemap.h>
 #include <linux/rwsem.h>
 #include <linux/wait.h>
-#include <linux/uio.h>
 
 #include <linux/nfs_fs_sb.h>
 
@@ -29,7 +28,6 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_xdr.h>
 #include <linux/rwsem.h>
-#include <linux/workqueue.h>
 #include <linux/mempool.h>
 
 /*
@@ -43,13 +41,6 @@
 #define NFS_MAX_FILE_IO_BUFFER_SIZE	32768
 #define NFS_DEF_FILE_IO_BUFFER_SIZE	4096
 
-/*
- * The upper limit on timeouts for the exponential backoff algorithm.
- */
-#define NFS_WRITEBACK_DELAY		(5*HZ)
-#define NFS_WRITEBACK_LOCKDELAY		(60*HZ)
-#define NFS_COMMIT_DELAY		(5*HZ)
-
 /*
  * superblock magic number for NFS
  */
@@ -60,9 +51,6 @@
  */
 #define NFS_RPC_SWAPFLAGS		(RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
 
-#define NFS_RW_SYNC		0x0001	/* O_SYNC handling */
-#define NFS_RW_SWAP		0x0002	/* This is a swap request */
-
 /*
  * When flushing a cluster of dirty pages, there can be different
  * strategies:
@@ -434,11 +422,6 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
 	mempool_free(p, nfs_wdata_mempool);
 }
 
-/* Hack for future NFS swap support */
-#ifndef IS_SWAPFILE
-# define IS_SWAPFILE(inode)	(0)
-#endif
-
 /*
  * linux/fs/nfs/read.c
  */
@@ -515,230 +498,6 @@ extern void * nfs_root_data(void);
 
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
-#ifdef CONFIG_NFS_V4
-
-struct idmap;
-
-/*
- * In a seqid-mutating op, this macro controls which error return
- * values trigger incrementation of the seqid.
- *
- * from rfc 3010:
- * The client MUST monotonically increment the sequence number for the
- * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
- * operations.  This is true even in the event that the previous
- * operation that used the sequence number received an error.  The only
- * exception to this rule is if the previous operation received one of
- * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
- * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
- * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
- *
- */
-#define seqid_mutating_err(err)       \
-(((err) != NFSERR_STALE_CLIENTID) &&  \
- ((err) != NFSERR_STALE_STATEID)  &&  \
- ((err) != NFSERR_BAD_STATEID)    &&  \
- ((err) != NFSERR_BAD_SEQID)      &&  \
- ((err) != NFSERR_BAD_XDR)        &&  \
- ((err) != NFSERR_RESOURCE)       &&  \
- ((err) != NFSERR_NOFILEHANDLE))
-
-enum nfs4_client_state {
-	NFS4CLNT_OK  = 0,
-};
-
-/*
- * The nfs4_client identifies our client state to the server.
- */
-struct nfs4_client {
-	struct list_head	cl_servers;	/* Global list of servers */
-	struct in_addr		cl_addr;	/* Server identifier */
-	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
-	unsigned long		cl_state;
-
-	u32			cl_lockowner_id;
-
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
-	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
-	spinlock_t		cl_lock;
-	atomic_t		cl_count;
-
-	struct rpc_clnt *	cl_rpcclient;
-	struct rpc_cred *	cl_cred;
-
-	struct list_head	cl_superblocks;	/* List of nfs_server structs */
-
-	unsigned long		cl_lease_time;
-	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
-
-	wait_queue_head_t	cl_waitq;
-	struct rpc_wait_queue	cl_rpcwaitq;
-
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
-	/* idmapper */
-	struct idmap *		cl_idmap;
-
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			cl_ipaddr[16];
-	unsigned char		cl_id_uniquifier;
-};
-
-/*
- * NFS4 state_owners and lock_owners are simply labels for ordered
- * sequences of RPC calls. Their sole purpose is to provide once-only
- * semantics by allowing the server to identify replayed requests.
- *
- * The ->so_sema is held during all state_owner seqid-mutating operations:
- * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
- * so_seqid.
- */
-struct nfs4_state_owner {
-	struct list_head     so_list;	 /* per-clientid list of state_owners */
-	struct nfs4_client   *so_client;
-	u32                  so_id;      /* 32-bit identifier, unique */
-	struct semaphore     so_sema;
-	u32                  so_seqid;   /* protected by so_sema */
-	atomic_t	     so_count;
-
-	struct rpc_cred	     *so_cred;	 /* Associated cred */
-	struct list_head     so_states;
-	struct list_head     so_delegations;
-};
-
-/*
- * struct nfs4_state maintains the client-side state for a given
- * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
- *
- * OPEN:
- * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server,
- * we need to know how many files are open for reading or writing on a
- * given inode. This information too is stored here.
- *
- * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
- */
-
-struct nfs4_lock_state {
-	struct list_head	ls_locks;	/* Other lock stateids */
-	fl_owner_t		ls_owner;	/* POSIX lock owner */
-#define NFS_LOCK_INITIALIZED 1
-	int			ls_flags;
-	u32			ls_seqid;
-	u32			ls_id;
-	nfs4_stateid		ls_stateid;
-	atomic_t		ls_count;
-};
-
-/* bits for nfs4_state->flags */
-enum {
-	LK_STATE_IN_USE,
-	NFS_DELEGATED_STATE,
-};
-
-struct nfs4_state {
-	struct list_head open_states;	/* List of states for the same state_owner */
-	struct list_head inode_states;	/* List of states for the same inode */
-	struct list_head lock_states;	/* List of subservient lock stateids */
-
-	struct nfs4_state_owner *owner;	/* Pointer to the open owner */
-	struct inode *inode;		/* Pointer to the inode */
-
-	unsigned long flags;		/* Do we hold any locks? */
-	struct semaphore lock_sema;	/* Serializes file locking operations */
-	rwlock_t state_lock;		/* Protects the lock_states list */
-
-	nfs4_stateid stateid;
-
-	unsigned int nreaders;
-	unsigned int nwriters;
-	int state;			/* State on the server (R,W, or RW) */
-	atomic_t count;
-};
-
-
-struct nfs4_exception {
-	long timeout;
-	int retry;
-};
-
-struct nfs4_state_recovery_ops {
-	int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
-	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
-};
-
-extern struct dentry_operations nfs4_dentry_operations;
-extern struct inode_operations nfs4_dir_inode_operations;
-
-/* nfs4proc.c */
-extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
-extern int nfs4_proc_async_renew(struct nfs4_client *);
-extern int nfs4_proc_renew(struct nfs4_client *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
-extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
-extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
-
-extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
-extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
-
-/* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
-extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
-
-/* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern int nfs4_init_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
-
-extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
-extern void nfs4_put_state_owner(struct nfs4_state_owner *);
-extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
-extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
-extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct nfs4_state *, mode_t);
-extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
-extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
-extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t);
-extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t);
-extern void nfs4_put_lock_state(struct nfs4_lock_state *state);
-extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
-extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
-extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
-
-
-
-struct nfs4_mount_data;
-#else
-#define init_nfsv4_state(server)  do { } while (0)
-#define destroy_nfsv4_state(server)       do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
-#define nfs4_close_state(a, b) do { } while (0)
-#define nfs4_renewd_prepare_shutdown(server) do { } while (0)
-#endif
-
 #endif /* __KERNEL__ */
 
 /*
-- 
cgit v1.2.3


From a656db998785324a818005bcf71bae6dcbbb3cf5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: Remove unused NFS inode field readdir_timestamp.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fb33e7655cfa..68d5aae89972 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -128,7 +128,6 @@ struct nfs_inode {
 	 *
 	 *	mtime != read_cache_mtime
 	 */
-	unsigned long		readdir_timestamp;
 	unsigned long		read_cache_jiffies;
 	unsigned long		attrtimeo;
 	unsigned long		attrtimeo_timestamp;
-- 
cgit v1.2.3


From 96651ab341cde0fee940ec837f323d711cbfa7d5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] RPC: Shrink struct rpc_task by switching to wait_on_bit()

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 99d17ed7cebb..4d77e90d0b30 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -31,7 +31,6 @@ struct rpc_wait_queue;
 struct rpc_wait {
 	struct list_head	list;		/* wait queue links */
 	struct list_head	links;		/* Links to related tasks */
-	wait_queue_head_t	waitq;		/* sync: sleep on this q */
 	struct rpc_wait_queue *	rpc_waitq;	/* RPC wait queue we're on */
 };
 
-- 
cgit v1.2.3


From 464a98bd70bae8c559cfc82af799faf44824ce64 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: cleanup: shrink struct nfs_open_context

 Remove the wait queue, and replace the functions that depended on it
 with wait_on_bit().

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 68d5aae89972..0b01b96337f8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -84,7 +84,6 @@ struct nfs_open_context {
 	int error;
 
 	struct list_head list;
-	wait_queue_head_t waitq;
 };
 
 /*
-- 
cgit v1.2.3


From 92cfc62cb8412c9563860b1bf70cd4701f03092e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFS: Allow NFS versions to support different sets of inode
 operations.

 ACL support will require supporting additional inode operations in v4
 (getxattr, setxattr, listxattr).  This patch allows different protocol versions
 to support different inode operations by adding a file_inode_ops to the
 nfs_rpc_ops (to match the existing dir_inode_ops).

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 47037d9521cb..5b45bafd9db5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -667,6 +667,7 @@ struct nfs_rpc_ops {
 	int	version;		/* Protocol version */
 	struct dentry_operations *dentry_ops;
 	struct inode_operations *dir_inode_ops;
+	struct inode_operations *file_inode_ops;
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
-- 
cgit v1.2.3


From ada70d9425bcc5e376fef8591e4e76e204c0834c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFS: Add hooks to allow common NFS attribute code to clear
 cached acls

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h  | 1 +
 include/linux/nfs_xdr.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0b01b96337f8..140bdf489f71 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -189,6 +189,7 @@ struct nfs_inode {
 #define NFS_INO_INVALID_DATA	0x0010		/* cached data is invalid */
 #define NFS_INO_INVALID_ATIME	0x0020		/* cached atime is invalid */
 #define NFS_INO_INVALID_ACCESS	0x0040		/* cached access cred invalid */
+#define NFS_INO_INVALID_ACL	0x0080		/* cached acls are invalid */
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5b45bafd9db5..cf38db59f347 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -714,6 +714,7 @@ struct nfs_rpc_ops {
 	int	(*file_open)   (struct inode *, struct file *);
 	int	(*file_release) (struct inode *, struct file *);
 	int	(*lock)(struct file *, int, struct file_lock *);
+	void	(*clear_acl_cache)(struct inode *);
 };
 
 /*
-- 
cgit v1.2.3


From 029d105e66e5a90850d5a09dad76815d0bcfcaa3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: Client-side xdr for reading NFSv4 acls

 Client-side support for NFSv4 acls: xdr encoding and decoding routines for
 reading acls

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4.h    | 1 +
 include/linux/nfs_xdr.h | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 5ca8a8d8ccdf..6ee7e2585af5 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -382,6 +382,7 @@ enum {
 	NFSPROC4_CLNT_READDIR,
 	NFSPROC4_CLNT_SERVER_CAPS,
 	NFSPROC4_CLNT_DELEGRETURN,
+	NFSPROC4_CLNT_GETACL,
 };
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index cf38db59f347..9f5e1d407c7b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -326,6 +326,13 @@ struct nfs_setattrargs {
 	const u32 *			bitmask;
 };
 
+struct nfs_getaclargs {
+	struct nfs_fh *			fh;
+	size_t				acl_len;
+	unsigned int			acl_pgbase;
+	struct page **			acl_pages;
+};
+
 struct nfs_setattrres {
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
-- 
cgit v1.2.3


From 23ec6965c20db96bc8ea7af0ec178f074dd31c40 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: Client-side xdr for writing NFSv4 acls

 Client-side support for NFSv4 acls: xdr encoding and decoding routines for
 writing acls

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4.h    | 1 +
 include/linux/nfs_xdr.h | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 6ee7e2585af5..5bb5b2fd7ba2 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -383,6 +383,7 @@ enum {
 	NFSPROC4_CLNT_SERVER_CAPS,
 	NFSPROC4_CLNT_DELEGRETURN,
 	NFSPROC4_CLNT_GETACL,
+	NFSPROC4_CLNT_SETACL,
 };
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9f5e1d407c7b..46b206b460c0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -326,6 +326,13 @@ struct nfs_setattrargs {
 	const u32 *			bitmask;
 };
 
+struct nfs_setaclargs {
+	struct nfs_fh *			fh;
+	size_t				acl_len;
+	unsigned int			acl_pgbase;
+	struct page **			acl_pages;
+};
+
 struct nfs_getaclargs {
 	struct nfs_fh *			fh;
 	size_t				acl_len;
-- 
cgit v1.2.3


From e50a1c2e1f816c81eed6a589019052cb44189267 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:23 +0000
Subject: [PATCH] NFSv4: client-side caching NFSv4 ACLs

 Add nfs4_acl field to the nfs_inode, and use it to cache acls.  Only cache
 acls of size up to a page.  Also prepare for up to a page of acl data even
 when the user doesn't pass in a buffer, as when they want to get the acl
 length to decide what size buffer to allocate.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 140bdf489f71..d2b5d7e0e85a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -169,13 +169,13 @@ struct nfs_inode {
 	wait_queue_head_t	nfs_i_wait;
 
 #ifdef CONFIG_NFS_V4
+	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
 	struct list_head	open_states;
 	struct nfs_delegation	*delegation;
 	int			 delegation_state;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
-
 	struct inode		vfs_inode;
 };
 
-- 
cgit v1.2.3


From 007e251f2b2760f738c92adc8c80cbae0bed3ce5 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:23 +0000
Subject: [PATCH] RPC: Allow multiple RPC client programs to share the same
 transport

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index d25e80f77ff5..ab151bbb66df 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -114,6 +114,8 @@ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
 struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname,
 				struct rpc_program *info,
 				u32 version, rpc_authflavor_t authflavor);
+struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
+				struct rpc_program *, int);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
-- 
cgit v1.2.3


From e053d1ab62c8ef0eff3dd4c95448cad3c6d2fbf4 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <okir@suse.de>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] RPC: Lazy RPC receive buffer allocation

 Signed-off-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 541dcf838abf..0f5b7a5a7432 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -160,7 +160,7 @@ typedef struct {
 
 typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
 
-extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
+extern int xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
 		skb_reader_t *, skb_read_actor_t);
 
 struct socket;
-- 
cgit v1.2.3


From 7e06b53d796a3740307b54aa2799077f8a0c84e7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] RPC: fix accounting bug in the case of a truncated RPC
 message

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 0f5b7a5a7432..5d1eed2b58a1 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -160,7 +160,7 @@ typedef struct {
 
 typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
 
-extern int xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
+extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
 		skb_reader_t *, skb_read_actor_t);
 
 struct socket;
-- 
cgit v1.2.3


From bd8100e7eda87507649c6ba4cb32173b34e49986 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] RPC: Encode and decode arbitrary XDR arrays

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 5d1eed2b58a1..34ec3e8d99b3 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -146,7 +146,8 @@ extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int);
 extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int);
-extern int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len);
+extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int);
+extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int);
 
 /*
  * Helper structure for copying from an sk_buff.
@@ -168,6 +169,22 @@ struct sockaddr;
 extern int xdr_sendpages(struct socket *, struct sockaddr *, int,
 		struct xdr_buf *, unsigned int, int);
 
+extern int xdr_encode_word(struct xdr_buf *, int, u32);
+extern int xdr_decode_word(struct xdr_buf *, int, u32 *);
+
+struct xdr_array2_desc;
+typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem);
+struct xdr_array2_desc {
+	unsigned int elem_size;
+	unsigned int array_len;
+	xdr_xcode_elem_t xcode;
+};
+
+extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
+                             struct xdr_array2_desc *desc);
+extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+			     struct xdr_array2_desc *desc);
+
 /*
  * Provide some simple tools for XDR buffer overflow-checking etc.
  */
-- 
cgit v1.2.3


From 9ba02638e4be28dd4ff724202a640264427c62d1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] RPC: Allow the sunrpc server to multiplex serveral programs
 on a single port

 The NFS and NFSACL programs run on the same RPC transport.  This patch adds
 support for this by converting svc_program into a chained list of programs
 (server-side).

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Signed-off-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 37003970cf2e..facb94488bb1 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -240,9 +240,10 @@ struct svc_deferred_req {
 };
 
 /*
- * RPC program
+ * List of RPC programs on the same transport endpoint
  */
 struct svc_program {
+	struct svc_program *	pg_next;	/* other programs (same xprt) */
 	u32			pg_prog;	/* program number */
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* lowest version */
-- 
cgit v1.2.3


From a257cdd0e2179630d3201c32ba14d7fcb3c3a055 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:26 +0000
Subject: [PATCH] NFSD: Add server support for NFSv3 ACLs.

 This adds functions for encoding and decoding POSIX ACLs for the NFSACL
 protocol extension, and the GETACL and SETACL RPCs.  The implementation is
 compatible with NFSACL in Solaris.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfsacl.h     | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/nfsd.h  | 16 +++++++++++++
 include/linux/nfsd/xdr.h   |  4 ++++
 include/linux/nfsd/xdr3.h  | 26 +++++++++++++++++++++
 include/linux/sunrpc/svc.h | 11 +++++++++
 5 files changed, 115 insertions(+)
 create mode 100644 include/linux/nfsacl.h

(limited to 'include/linux')

diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h
new file mode 100644
index 000000000000..54487a99beb8
--- /dev/null
+++ b/include/linux/nfsacl.h
@@ -0,0 +1,58 @@
+/*
+ * File: linux/nfsacl.h
+ *
+ * (C) 2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+#ifndef __LINUX_NFSACL_H
+#define __LINUX_NFSACL_H
+
+#define NFS_ACL_PROGRAM	100227
+
+#define ACLPROC2_GETACL		1
+#define ACLPROC2_SETACL		2
+#define ACLPROC2_GETATTR	3
+#define ACLPROC2_ACCESS		4
+
+#define ACLPROC3_GETACL		1
+#define ACLPROC3_SETACL		2
+
+
+/* Flags for the getacl/setacl mode */
+#define NFS_ACL			0x0001
+#define NFS_ACLCNT		0x0002
+#define NFS_DFACL		0x0004
+#define NFS_DFACLCNT		0x0008
+
+/* Flag for Default ACL entries */
+#define NFS_ACL_DEFAULT		0x1000
+
+#ifdef __KERNEL__
+
+#include <linux/posix_acl.h>
+
+/* Maximum number of ACL entries over NFS */
+#define NFS_ACL_MAX_ENTRIES	1024
+
+#define NFSACL_MAXWORDS		(2*(2+3*NFS_ACL_MAX_ENTRIES))
+#define NFSACL_MAXPAGES		((2*(8+12*NFS_ACL_MAX_ENTRIES) + PAGE_SIZE-1) \
+				 >> PAGE_SHIFT)
+
+static inline unsigned int
+nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default)
+{
+	unsigned int w = 16;
+	w += max(acl_access ? (int)acl_access->a_count : 3, 4) * 12;
+	if (acl_default)
+		w += max((int)acl_default->a_count, 4) * 12;
+	return w;
+}
+
+extern unsigned int
+nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+	      struct posix_acl *acl, int encode_entries, int typeflag);
+extern unsigned int
+nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+	      struct posix_acl **pacl);
+
+#endif /* __KERNEL__ */
+#endif  /* __LINUX_NFSACL_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 8f85d9a59607..4bf931d5ff56 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -15,6 +15,7 @@
 #include <linux/unistd.h>
 #include <linux/dirent.h>
 #include <linux/fs.h>
+#include <linux/posix_acl.h>
 #include <linux/mount.h>
 
 #include <linux/nfsd/debug.h>
@@ -124,6 +125,21 @@ int		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 int		nfsd_notify_change(struct inode *, struct iattr *);
 int		nfsd_permission(struct svc_export *, struct dentry *, int);
 
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+#ifdef CONFIG_NFSD_V2_ACL
+extern struct svc_version nfsd_acl_version2;
+#else
+#define nfsd_acl_version2 NULL
+#endif
+#ifdef CONFIG_NFSD_V3_ACL
+extern struct svc_version nfsd_acl_version3;
+#else
+#define nfsd_acl_version3 NULL
+#endif
+struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
+int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
+#endif
+
 
 /* 
  * NFSv4 State
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
index ecccef777dae..130d4f588a37 100644
--- a/include/linux/nfsd/xdr.h
+++ b/include/linux/nfsd/xdr.h
@@ -169,4 +169,8 @@ int nfssvc_encode_entry(struct readdir_cd *, const char *name,
 
 int nfssvc_release_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *);
 
+/* Helper functions for NFSv2 ACL code */
+u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp);
+u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp);
+
 #endif /* LINUX_NFSD_H */
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h
index 0ae9e0ef5f68..21e18ce7ca63 100644
--- a/include/linux/nfsd/xdr3.h
+++ b/include/linux/nfsd/xdr3.h
@@ -110,6 +110,19 @@ struct nfsd3_commitargs {
 	__u32			count;
 };
 
+struct nfsd3_getaclargs {
+	struct svc_fh		fh;
+	int			mask;
+};
+
+struct posix_acl;
+struct nfsd3_setaclargs {
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
 struct nfsd3_attrstat {
 	__u32			status;
 	struct svc_fh		fh;
@@ -209,6 +222,14 @@ struct nfsd3_commitres {
 	struct svc_fh		fh;
 };
 
+struct nfsd3_getaclres {
+	__u32			status;
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
 /* dummy type for release */
 struct nfsd3_fhandle_pair {
 	__u32			dummy;
@@ -241,6 +262,7 @@ union nfsd3_xdrstore {
 	struct nfsd3_fsinfores		fsinfores;
 	struct nfsd3_pathconfres	pathconfres;
 	struct nfsd3_commitres		commitres;
+	struct nfsd3_getaclres		getaclres;
 };
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
@@ -316,6 +338,10 @@ int nfs3svc_encode_entry(struct readdir_cd *, const char *name,
 int nfs3svc_encode_entry_plus(struct readdir_cd *, const char *name,
 				int namlen, loff_t offset, ino_t ino,
 				unsigned int);
+/* Helper functions for NFSv3 ACL code */
+u32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p,
+				struct svc_fh *fhp);
+u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp);
 
 
 #endif /* _LINUX_NFSD_XDR3_H */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index facb94488bb1..5af8800e0ce3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -185,6 +185,17 @@ xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
 	return vec->iov_len <= PAGE_SIZE;
 }
 
+static inline struct page *
+svc_take_res_page(struct svc_rqst *rqstp)
+{
+	if (rqstp->rq_arghi <= rqstp->rq_argused)
+		return NULL;
+	rqstp->rq_arghi--;
+	rqstp->rq_respages[rqstp->rq_resused] =
+		rqstp->rq_argpages[rqstp->rq_arghi];
+	return rqstp->rq_respages[rqstp->rq_resused++];
+}
+
 static inline int svc_take_page(struct svc_rqst *rqstp)
 {
 	if (rqstp->rq_arghi <= rqstp->rq_argused)
-- 
cgit v1.2.3


From b7fa0554cf1ba6d6895cd0a5b02989a26e0bc704 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Add support for NFSv3 ACLs

 This adds acl support fo nfs clients via the NFSACL protocol extension, by
 implementing the getxattr, listxattr, setxattr, and removexattr iops for the
 system.posix_acl_access and system.posix_acl_default attributes.  This patch
 implements a dumb version that uses no caching (and thus adds some overhead).
 (Another patch in this patchset adds caching as well.)

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h    | 31 +++++++++++++++++++++++++++++++
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_mount.h |  1 +
 include/linux/nfs_xdr.h   | 27 +++++++++++++++++++++++++++
 4 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d2b5d7e0e85a..3a5e442ac776 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -301,6 +301,9 @@ extern u32 root_nfs_parse_addr(char *name); /*__init*/
  * linux/fs/nfs/file.c
  */
 extern struct inode_operations nfs_file_inode_operations;
+#ifdef CONFIG_NFS_V3
+extern struct inode_operations nfs3_file_inode_operations;
+#endif /* CONFIG_NFS_V3 */
 extern struct file_operations nfs_file_operations;
 extern struct address_space_operations nfs_file_aops;
 
@@ -315,6 +318,22 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file)
 	return NULL;
 }
 
+/*
+ * linux/fs/nfs/xattr.c
+ */
+#ifdef CONFIG_NFS_V3_ACL
+extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
+extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t);
+extern int nfs3_setxattr(struct dentry *, const char *,
+			const void *, size_t, int);
+extern int nfs3_removexattr (struct dentry *, const char *name);
+#else
+# define nfs3_listxattr NULL
+# define nfs3_getxattr NULL
+# define nfs3_setxattr NULL
+# define nfs3_removexattr NULL
+#endif
+
 /*
  * linux/fs/nfs/direct.c
  */
@@ -329,6 +348,9 @@ extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
  * linux/fs/nfs/dir.c
  */
 extern struct inode_operations nfs_dir_inode_operations;
+#ifdef CONFIG_NFS_V3
+extern struct inode_operations nfs3_dir_inode_operations;
+#endif /* CONFIG_NFS_V3 */
 extern struct file_operations nfs_dir_operations;
 extern struct dentry_operations nfs_dentry_operations;
 
@@ -449,6 +471,15 @@ static inline void nfs_readdata_free(struct nfs_read_data *p)
 
 extern void  nfs_readdata_release(struct rpc_task *task);
 
+/*
+ * linux/fs/nfs3proc.c
+ */
+#ifdef CONFIG_NFS_V3_ACL
+extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type);
+extern int nfs3_proc_setacl(struct inode *inode, int type,
+			    struct posix_acl *acl);
+#endif /* CONFIG_NFS_V3_ACL */
+
 /*
  * linux/fs/mount_clnt.c
  * (Used only by nfsroot module)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fc51645d61ee..3d3a305488cf 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -10,6 +10,7 @@
 struct nfs_server {
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
+	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 0071428231f9..659c75438454 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -58,6 +58,7 @@ struct nfs_mount_data {
 #define NFS_MOUNT_KERBEROS	0x0100	/* 3 */
 #define NFS_MOUNT_NONLM		0x0200	/* 3 */
 #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 4 */
+#define NFS_MOUNT_NOACL		0x0800	/* 4 */
 #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
 #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
 #define NFS_MOUNT_FLAGMASK	0xFFFF
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 46b206b460c0..a2bf6914ff1b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -2,6 +2,7 @@
 #define _LINUX_NFS_XDR_H
 
 #include <linux/sunrpc/xprt.h>
+#include <linux/nfsacl.h>
 
 struct nfs4_fsid {
 	__u64 major;
@@ -368,6 +369,20 @@ struct nfs_readdirargs {
 	struct page **		pages;
 };
 
+struct nfs3_getaclargs {
+	struct nfs_fh *		fh;
+	int			mask;
+	struct page **		pages;
+};
+
+struct nfs3_setaclargs {
+	struct inode *		inode;
+	int			mask;
+	struct posix_acl *	acl_access;
+	struct posix_acl *	acl_default;
+	struct page **		pages;
+};
+
 struct nfs_diropok {
 	struct nfs_fh *		fh;
 	struct nfs_fattr *	fattr;
@@ -491,6 +506,15 @@ struct nfs3_readdirres {
 	int			plus;
 };
 
+struct nfs3_getaclres {
+	struct nfs_fattr *	fattr;
+	int			mask;
+	unsigned int		acl_access_count;
+	unsigned int		acl_default_count;
+	struct posix_acl *	acl_access;
+	struct posix_acl *	acl_default;
+};
+
 #ifdef CONFIG_NFS_V4
 
 typedef u64 clientid4;
@@ -748,4 +772,7 @@ extern struct rpc_version	nfs_version2;
 extern struct rpc_version	nfs_version3;
 extern struct rpc_version	nfs_version4;
 
+extern struct rpc_version	nfsacl_version3;
+extern struct rpc_program	nfsacl_program;
+
 #endif
-- 
cgit v1.2.3


From 055ffbea0596942579b0dae71d5dab78de8135f6 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Fix handling of the umask when an NFSv3 default acl is
 present.

 NFSv3 has no concept of a umask on the server side: The client applies
 the umask locally, and sends the effective permissions to the server.
 This behavior is wrong when files are created in a directory that has a
 default ACL.  In this case, the umask is supposed to be ignored, and
 only the default ACL determines the file's effective permissions.

 Usually its the server's task to conditionally apply the umask.  But
 since the server knows nothing about the umask, we have to do it on the
 client side.  This patch tries to fetch the parent directory's default
 ACL before creating a new file, computes the appropriate create mode to
 send to the server, and finally sets the new file's access and default
 acl appropriately.

 Many thanks to Buck Huppmann <buchk@pobox.com> for sending the initial
 version of this patch, as well as for arguing why we need this change.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 3a5e442ac776..7662c5131b47 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -478,6 +478,15 @@ extern void  nfs_readdata_release(struct rpc_task *task);
 extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type);
 extern int nfs3_proc_setacl(struct inode *inode, int type,
 			    struct posix_acl *acl);
+extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
+		mode_t mode);
+#else
+static inline int nfs3_proc_set_default_acl(struct inode *dir,
+					    struct inode *inode,
+					    mode_t mode)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V3_ACL */
 
 /*
-- 
cgit v1.2.3


From 5c6a9f7d92291c832d47e792ed1fafa44acb066e Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Cache the NFSv3 acls.

 Attach acls to inodes in the icache to avoid unnecessary GETACL RPC
 round-trips.  As long as the client doesn't retrieve any acls itself, only the
 default acls of exiting directories and the default and access acls of new
 directories will end up in the cache, which preserves some memory compared to
 always caching the access and default acl of all files.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7662c5131b47..4ceac9ddac93 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -91,6 +91,8 @@ struct nfs_open_context {
  */
 struct nfs_delegation;
 
+struct posix_acl;
+
 /*
  * nfs fs inode data in memory
  */
@@ -144,6 +146,10 @@ struct nfs_inode {
 	atomic_t		data_updates;
 
 	struct nfs_access_entry	cache_access;
+#ifdef CONFIG_NFS_V3_ACL
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+#endif
 
 	/*
 	 * This is the cookie verifier used for NFSv3 readdir
@@ -480,6 +486,7 @@ extern int nfs3_proc_setacl(struct inode *inode, int type,
 			    struct posix_acl *acl);
 extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
 		mode_t mode);
+extern void nfs3_forget_cached_acls(struct inode *inode);
 #else
 static inline int nfs3_proc_set_default_acl(struct inode *dir,
 					    struct inode *inode,
@@ -487,6 +494,10 @@ static inline int nfs3_proc_set_default_acl(struct inode *dir,
 {
 	return 0;
 }
+
+static inline void nfs3_forget_cached_acls(struct inode *inode)
+{
+}
 #endif /* CONFIG_NFS_V3_ACL */
 
 /*
-- 
cgit v1.2.3


From 00a926422765064cb28e218d4837411c88bf6a3e Mon Sep 17 00:00:00 2001
From: Olivier Galibert <galibert@pobox.com>
Date: Wed, 22 Jun 2005 17:16:29 +0000
Subject: [PATCH] NFS: Hide NFS server-generated readdir cookies from userland

 NFSv3 currently returns the unsigned 64-bit cookie directly to
 userspace. The following patch causes the kernel to generate
 loff_t offsets for the benefit of userland.
 The current server-generated READDIR cookie is cached in the
 nfs_open_context instead of in filp->f_pos, so we still end up work
 correctly under directory insertions/deletion.

 Signed-off-by: Olivier Galibert <galibert@pobox.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4ceac9ddac93..f810195ef7ad 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -84,6 +84,9 @@ struct nfs_open_context {
 	int error;
 
 	struct list_head list;
+
+	int dir_pos;		/* Directory cookie cache */
+	__u64 dir_cookie;
 };
 
 /*
-- 
cgit v1.2.3


From f0dd2136da6d2070e12bfa6d199b136318e666c7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:29 +0000
Subject: [PATCH] NFS: Clean up readdir changes.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f810195ef7ad..c90313bfa435 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -85,7 +85,6 @@ struct nfs_open_context {
 
 	struct list_head list;
 
-	int dir_pos;		/* Directory cookie cache */
 	__u64 dir_cookie;
 };
 
-- 
cgit v1.2.3


From 951a143b3fcf15cfa9d38250b7462f821db241db Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Fix the file size revalidation

 Instead of looking at whether or not the file is open for writes before
 we accept to update the length using the server value, we should rather
 be looking at whether or not we are currently caching any writes.

 Failure to do so means in particular that we're not updating the file
 length correctly after obtaining a POSIX or BSD lock.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c90313bfa435..211266c56ce5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -294,7 +294,6 @@ extern void nfs_begin_attr_update(struct inode *);
 extern void nfs_end_attr_update(struct inode *);
 extern void nfs_begin_data_update(struct inode *);
 extern void nfs_end_data_update(struct inode *);
-extern void nfs_end_data_update_defer(struct inode *);
 extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
-- 
cgit v1.2.3


From 7d52e86274e09fce8ac8f963e3605a84d0a305a7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Cleanup of caching code, and slight optimization of
 writes.

 Unless we're doing O_APPEND writes, we really don't care about revalidating
 the file length. Just make sure that we catch any page cache invalidations.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 211266c56ce5..443103c13e53 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -289,6 +289,7 @@ extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
+extern void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_begin_attr_update(struct inode *);
 extern void nfs_end_attr_update(struct inode *);
-- 
cgit v1.2.3


From fe51beecc55d0b0dce289e4758e7c529a642f63e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Ensure that fstat() always returns the correct mtime

 Even if the file is open for writes.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 443103c13e53..2954e44ed498 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -198,6 +198,7 @@ struct nfs_inode {
 #define NFS_INO_INVALID_ATIME	0x0020		/* cached atime is invalid */
 #define NFS_INO_INVALID_ACCESS	0x0040		/* cached access cred invalid */
 #define NFS_INO_INVALID_ACL	0x0080		/* cached acls are invalid */
+#define NFS_INO_REVAL_PAGECACHE	0x1000		/* must revalidate pagecache */
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
-- 
cgit v1.2.3


From c6a556b88adfacd2af90be84357c8165d716c27d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Make searching and waiting on busy writeback requests
 more efficient.

 Basically copies the VFS's method for tracking writebacks and applies
 it to the struct nfs_page.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_page.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 39e4895bcdb4..db40e4590ba2 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -19,6 +19,11 @@
 
 #include <asm/atomic.h>
 
+/*
+ * Valid flags for the radix tree
+ */
+#define NFS_PAGE_TAG_WRITEBACK	1
+
 /*
  * Valid flags for a dirty buffer
  */
@@ -62,6 +67,9 @@ extern	int nfs_coalesce_requests(struct list_head *, struct list_head *,
 				  unsigned int);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
+extern  int nfs_set_page_writeback_locked(struct nfs_page *req);
+extern  void nfs_clear_page_writeback(struct nfs_page *req);
+
 
 /*
  * Lock the page of an asynchronous request without incrementing the wb_count
@@ -96,10 +104,6 @@ nfs_list_remove_request(struct nfs_page *req)
 {
 	if (list_empty(&req->wb_list))
 		return;
-	if (!NFS_WBACK_BUSY(req)) {
-		printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n");
-		BUG();
-	}
 	list_del_init(&req->wb_list);
 	req->wb_list_head = NULL;
 }
-- 
cgit v1.2.3


From 3da28eb1c6545fe73263a24eba0996217490e1eb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:31 +0000
Subject: [PATCH] NFS: Replace nfs_page insertion sort with a radix sort

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h   |  4 ++--
 include/linux/nfs_page.h | 18 ++++++++++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2954e44ed498..8ea249110fb0 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -395,10 +395,10 @@ extern void nfs_commit_done(struct rpc_task *);
  */
 extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, unsigned long, unsigned int, int);
+extern int  nfs_commit_inode(struct inode *, int);
 #else
 static inline int
-nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how)
+nfs_commit_inode(struct inode *inode, int how)
 {
 	return 0;
 }
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index db40e4590ba2..da2e077b65e2 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -22,6 +22,7 @@
 /*
  * Valid flags for the radix tree
  */
+#define NFS_PAGE_TAG_DIRTY	0
 #define NFS_PAGE_TAG_WRITEBACK	1
 
 /*
@@ -31,6 +32,7 @@
 #define PG_NEED_COMMIT		1
 #define PG_NEED_RESCHED		2
 
+struct nfs_inode;
 struct nfs_page {
 	struct list_head	wb_list,	/* Defines state of page: */
 				*wb_list_head;	/*      read/write/commit */
@@ -59,8 +61,8 @@ extern	void nfs_clear_request(struct nfs_page *req);
 extern	void nfs_release_request(struct nfs_page *req);
 
 
-extern	void nfs_list_add_request(struct nfs_page *, struct list_head *);
-
+extern  int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
+				unsigned long idx_start, unsigned int npages);
 extern	int nfs_scan_list(struct list_head *, struct list_head *,
 			  unsigned long, unsigned int);
 extern	int nfs_coalesce_requests(struct list_head *, struct list_head *,
@@ -94,6 +96,18 @@ nfs_lock_request(struct nfs_page *req)
 	return 1;
 }
 
+/**
+ * nfs_list_add_request - Insert a request into a list
+ * @req: request
+ * @head: head of list into which to insert the request.
+ */
+static inline void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+{
+	list_add_tail(&req->wb_list, head);
+	req->wb_list_head = head;
+}
+
 
 /**
  * nfs_list_remove_request - Remove a request from its wb_list
-- 
cgit v1.2.3


From ecdbf769b2cb8903e07cd482334c714d89fd1146 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:31 +0000
Subject: [PATCH] NLM: fix a client-side race on blocking locks.

 If the lock blocks, the server may send us a GRANTED message that
 races with the reply to our LOCK request. Make sure that we catch
 the GRANTED by queueing up our request on the nlm_blocked list
 before we send off the first LOCK rpc call.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/lockd/lockd.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0d9d22578212..16d4e5a08e1d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -72,6 +72,8 @@ struct nlm_lockowner {
 	uint32_t pid;
 };
 
+struct nlm_wait;
+
 /*
  * Memory chunk for NLM client RPC request.
  */
@@ -81,6 +83,7 @@ struct nlm_rqst {
 	struct nlm_host *	a_host;		/* host handle */
 	struct nlm_args		a_args;		/* arguments */
 	struct nlm_res		a_res;		/* result */
+	struct nlm_wait *	a_block;
 	char			a_owner[NLMCLNT_OHSIZE];
 };
 
@@ -142,7 +145,9 @@ extern unsigned long		nlmsvc_timeout;
  * Lockd client functions
  */
 struct nlm_rqst * nlmclnt_alloc_call(void);
-int		  nlmclnt_block(struct nlm_host *, struct file_lock *, u32 *);
+int		  nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
+void		  nlmclnt_finish_block(struct nlm_rqst *req);
+long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
 int		  nlmclnt_cancel(struct nlm_host *, struct file_lock *);
 u32		  nlmclnt_grant(struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
-- 
cgit v1.2.3


From 8d0a8a9d0ec790086c64d210af413ac351d89e35 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:32 +0000
Subject: [PATCH] NFSv4: Clean up nfs4 lock state accounting

 Ensure that lock owner structures are not released prematurely.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/fs.h       | 1 +
 include/linux/nfs_fs_i.h | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9b8b696d4f15..e5a8db00df29 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -674,6 +674,7 @@ struct file_lock {
 	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 		struct nfs_lock_info	nfs_fl;
+		struct nfs4_lock_info	nfs4_fl;
 	} fl_u;
 };
 
diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h
index e9a749588a7b..e2c18dabff86 100644
--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -16,6 +16,11 @@ struct nfs_lock_info {
 	struct nlm_lockowner *owner;
 };
 
+struct nfs4_lock_state;
+struct nfs4_lock_info {
+	struct nfs4_lock_state *owner;
+};
+
 /*
  * Lock flag values
  */
-- 
cgit v1.2.3