From 7e05484f02e1ea05a3aae0724d4df1e8a5a1920f Mon Sep 17 00:00:00 2001
From: Bryan Venteicher <bryan.venteicher@gmail.com>
Date: Tue, 16 Oct 2012 23:55:54 +1030
Subject: virtio-scsi: Add real 2-clause BSD license to header

This is analogous to commit a1b383870a made by Rusty Russell to all
the VirtIO headers at the time. This eases the use of the header as
is by other OSes.

Signed-off-by: Bryan Venteicher <bryanv@daemoninthecloset.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_scsi.h | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index d6b4440387b7..4195b97a3def 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h
@@ -1,7 +1,31 @@
+/*
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
 #ifndef _LINUX_VIRTIO_SCSI_H
 #define _LINUX_VIRTIO_SCSI_H
-/* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
 
 #define VIRTIO_SCSI_CDB_SIZE   32
 #define VIRTIO_SCSI_SENSE_SIZE 96
-- 
cgit v1.2.3


From 1b4f59e356cc94929305bd107b7f38eec62715ad Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Mon, 22 Oct 2012 18:05:36 +0400
Subject: slub: Commonize slab_cache field in struct page

Right now, slab and slub have fields in struct page to derive which
cache a page belongs to, but they do it slightly differently.

slab uses a field called slab_cache, that lives in the third double
word. slub, uses a field called "slab", living outside of the
doublewords area.

Ideally, we could use the same field for this. Since slub heavily makes
use of the doubleword region, there isn't really much room to move
slub's slab_cache field around. Since slab does not have such strict
placement restrictions, we can move it outside the doubleword area.

The naming used by slab, "slab_cache", is less confusing, and it is
preferred over slub's generic "slab".

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Christoph Lameter <cl@linux.com>
CC: David Rientjes <rientjes@google.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/mm_types.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 31f8a3af7d94..2fef4e720e79 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -128,10 +128,7 @@ struct page {
 		};
 
 		struct list_head list;	/* slobs list of pages */
-		struct {		/* slab fields */
-			struct kmem_cache *slab_cache;
-			struct slab *slab_page;
-		};
+		struct slab *slab_page; /* slab fields */
 	};
 
 	/* Remainder is not double word aligned */
@@ -146,7 +143,7 @@ struct page {
 #if USE_SPLIT_PTLOCKS
 		spinlock_t ptl;
 #endif
-		struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
 		struct page *first_page;	/* Compound tail pages */
 	};
 
-- 
cgit v1.2.3


From 6c9c6d6301287e369a754d628230fa6e50cdb74b Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuah.khan@hp.com>
Date: Mon, 8 Oct 2012 11:08:06 -0600
Subject: dma-debug: New interfaces to debug dma mapping errors

Add dma-debug interface debug_dma_mapping_error() to debug
drivers that fail to check dma mapping errors on addresses
returned by dma_map_single() and dma_map_page() interfaces.
This interface clears a flag set by debug_dma_map_page() to
indicate that dma_mapping_error() has been called by the
driver. When driver does unmap, debug_dma_unmap() checks the
flag and if this flag is still set, prints warning message
that includes call trace that leads up to the unmap. This
interface can be called from dma_mapping_error() routines to
enable dma mapping error check debugging.

Tested: Intel iommu and swiotlb (iommu=soft) on x86-64 with
        CONFIG_DMA_API_DEBUG enabled and disabled.

Signed-off-by: Shuah Khan <shuah.khan@hp.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 171ad8aedc83..fc0e34ce038f 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -39,6 +39,8 @@ extern void debug_dma_map_page(struct device *dev, struct page *page,
 			       int direction, dma_addr_t dma_addr,
 			       bool map_single);
 
+extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
 extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 				 size_t size, int direction, bool map_single);
 
@@ -105,6 +107,11 @@ static inline void debug_dma_map_page(struct device *dev, struct page *page,
 {
 }
 
+static inline void debug_dma_mapping_error(struct device *dev,
+					  dma_addr_t dma_addr)
+{
+}
+
 static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 					size_t size, int direction,
 					bool map_single)
-- 
cgit v1.2.3


From 242860a47a75b933a79a30f6a40bf4858f4a3ecc Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <elezegarcia@gmail.com>
Date: Fri, 19 Oct 2012 09:33:12 -0300
Subject: mm/sl[aou]b: Move common kmem_cache_size() to slab.h

This function is identically defined in all three allocators
and it's trivial to move it to slab.h

Since now it's static, inline, header-defined function
this patch also drops the EXPORT_SYMBOL tag.

Cc: Pekka Enberg <penberg@kernel.org>
Cc: Matt Mackall <mpm@selenic.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Ezequiel Garcia <elezegarcia@gmail.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 83d1a1454b7e..743a10415122 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
-unsigned int kmem_cache_size(struct kmem_cache *);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
@@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+/*
+ * Determine the size of a slab object
+ */
+static inline unsigned int kmem_cache_size(struct kmem_cache *s)
+{
+	return s->object_size;
+}
+
 void __init kmem_cache_init_late(void);
 
 #endif	/* _LINUX_SLAB_H */
-- 
cgit v1.2.3


From 86b00e0da6be7bbc16412f126c5b548ac5d91d50 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Thu, 25 Oct 2012 23:34:42 -0500
Subject: rbd: get parent spec for version 2 images

Add support for getting the the information identifying the parent
image for rbd images that have them.  The child image holds a
reference to its parent image specification structure.  Create a new
entry "parent" in /sys/bus/rbd/image/N/ to report the identifying
information for the parent image, if any.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/rados.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 0a99099801a4..15077db662ed 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -87,6 +87,8 @@ struct ceph_pg {
  *
  *  lpgp_num -- as above.
  */
+#define CEPH_NOPOOL  ((__u64) (-1))  /* pool id not defined */
+
 #define CEPH_PG_TYPE_REP     1
 #define CEPH_PG_TYPE_RAID4   2
 #define CEPH_PG_POOL_VERSION 2
-- 
cgit v1.2.3


From 72afc71ffca0f444ee0e1ef8c7e34ab209bb48b3 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 30 Oct 2012 19:40:33 -0500
Subject: libceph: define ceph_pg_pool_name_by_id()

Define and export function ceph_pg_pool_name_by_id() to supply
the name of a pg pool whose id is given.  This will be used by
the next patch.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osdmap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e88a620b9f8a..5ea57ba69320 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
 				struct ceph_pg pgid);
 
+extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
 extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
 
 #endif
-- 
cgit v1.2.3


From 216b6cbdcbd86b1db0754d58886b466ae31f5a63 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon@gmail.com>
Date: Wed, 29 Aug 2012 10:10:10 -0400
Subject: exportfs: add FILEID_INVALID to indicate invalid fid_type

This commit adds FILEID_INVALID = 0xff in fid_type to
indicate invalid fid_type

It avoids using magic number 255

Signed-off-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Vivek Trivedi <vtrivedi018@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/exportfs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 12291a7ee275..0e1452546300 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -83,6 +83,11 @@ enum fid_type {
 	 * 64 bit parent inode number.
 	 */
 	FILEID_NILFS_WITH_PARENT = 0x62,
+
+	/*
+	 * Filesystems must not use 0xff file ID.
+	 */
+	FILEID_INVALID = 0xff,
 };
 
 struct fid {
-- 
cgit v1.2.3


From 2be975c6d920de989ff5e4bc09ffe87e59d94662 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 3 Nov 2012 12:16:12 -0700
Subject: Input: introduce managed input devices (add devres support)

There is a demand from driver's writers to use managed devices framework
for their drivers. Unfortunately up to this moment input devices did not
provide support for managed devices and that lead to mixing two styles
of resource management which usually introduced more bugs, such as
manually unregistering input device but relying in devres to free
interrupt handler which (unless device is properly shut off) can cause
ISR to reference already freed memory.

This change introduces devm_input_allocate_device() that will allocate
managed instance of input device so that driver writers who prefer
using devm_* framework do not have to mix 2 styles.

Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index cab994ba6d91..5538cc09a4f5 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -112,6 +112,8 @@ struct input_value {
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
+ * @devres_managed: indicates that devices is managed with devres framework
+ *	and needs not be explicitly unregistered or freed.
  */
 struct input_dev {
 	const char *name;
@@ -180,6 +182,8 @@ struct input_dev {
 	unsigned int num_vals;
 	unsigned int max_vals;
 	struct input_value *vals;
+
+	bool devres_managed;
 };
 #define to_input_dev(d) container_of(d, struct input_dev, dev)
 
@@ -323,7 +327,8 @@ struct input_handle {
 	struct list_head	h_node;
 };
 
-struct input_dev *input_allocate_device(void);
+struct input_dev __must_check *input_allocate_device(void);
+struct input_dev __must_check *devm_input_allocate_device(struct device *);
 void input_free_device(struct input_dev *dev);
 
 static inline struct input_dev *input_get_device(struct input_dev *dev)
-- 
cgit v1.2.3


From 800963fd598e2acbcd3a21a17e3ab3c185ad0d6a Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 10 Nov 2012 00:32:36 -0800
Subject: Input: document new members of struct input_dev

Fixes kernel-doc warnings for the members added in 3.7-rc1.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 5538cc09a4f5..82ce323b9986 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -112,6 +112,9 @@ struct input_value {
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
+ * @num_vals: number of values queued in the current frame
+ * @max_vals: maximum number of values queued in a frame
+ * @vals: array of values queued in the current frame
  * @devres_managed: indicates that devices is managed with devres framework
  *	and needs not be explicitly unregistered or freed.
  */
-- 
cgit v1.2.3


From 49839dc93970789cea46f5171cd7f6ec11af64c7 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 6 Nov 2012 16:31:32 +0000
Subject: Revert "ARM: OMAP: convert I2C driver to PM QoS for MPU latency
 constraints"

This reverts commit 3db11feffc1ad2ab9dea27789e6b5b3032827adc
(ARM: OMAP: convert I2C driver to PM QoS for MPU latency constraints).
This commit causes I2C timeouts to appear on several OMAP3430/3530-based
boards:

  http://marc.info/?l=linux-arm-kernel&m=135071372426971&w=2
  http://marc.info/?l=linux-arm-kernel&m=135067558415214&w=2
  http://marc.info/?l=linux-arm-kernel&m=135216013608196&w=2

and appears to have been sent for merging before one of its prerequisites
was merged:

  http://marc.info/?l=linux-arm-kernel&m=135219411617621&w=2

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Acked-by: Jean Pihet <j-pihet@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/i2c-omap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index df804ba73e0b..92a0dc75bc74 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -34,6 +34,7 @@ struct omap_i2c_bus_platform_data {
 	u32		clkrate;
 	u32		rev;
 	u32		flags;
+	void		(*set_mpu_wkup_lat)(struct device *dev, long set);
 };
 
 #endif
-- 
cgit v1.2.3


From 2c88ab8c5af7d637d2a9d14b607fa6100fa64236 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti D <shubhrajyoti@ti.com>
Date: Mon, 5 Nov 2012 17:53:39 +0530
Subject: ARM: i2c: omap: Remove the i207 errata flag

The commit [i2c: omap: use revision check for OMAP_I2C_FLAG_APPLY_ERRATA_I207]
uses the revision id instead of the flag. So the flag can be safely removed.

Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Shubhrajyoti D <shubhrajyoti@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/i2c-omap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index 92a0dc75bc74..1b25c04f82d9 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -21,7 +21,6 @@
 #define OMAP_I2C_FLAG_SIMPLE_CLOCK		BIT(1)
 #define OMAP_I2C_FLAG_16BIT_DATA_REG		BIT(2)
 #define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE	BIT(3)
-#define OMAP_I2C_FLAG_APPLY_ERRATA_I207	BIT(4)
 #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK	BIT(5)
 #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK	BIT(6)
 /* how the CPU address bus must be translated for I2C unit access */
-- 
cgit v1.2.3


From 1cf3d8b3d24cd383ddfd5442c83ec5c355ffc2f7 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Tue, 2 Oct 2012 16:57:57 +0000
Subject: powerpc+of: Add of node/property notification chain for adds and
 removes

This patch moves the notification chain for updates to the device tree
from the powerpc/pseries code to the base OF code. This makes this
functionality available to all architectures.

Additionally the notification chain is updated to allow notifications
for property add/remove/update. To make this work a pointer to a new
struct (of_prop_reconfig) is passed to the routines in the notification chain.
The of_prop_reconfig property contains a pointer to the node containing the
property and a pointer to the property itself. In the case of property
updates, the property pointer refers to the new property.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/of.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 72843b72a2b2..fb5d87b66e3e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -22,6 +22,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/spinlock.h>
 #include <linux/topology.h>
+#include <linux/notifier.h>
 
 #include <asm/byteorder.h>
 #include <asm/errno.h>
@@ -272,11 +273,24 @@ extern int prom_remove_property(struct device_node *np, struct property *prop);
 extern int prom_update_property(struct device_node *np,
 				struct property *newprop);
 
-#if defined(CONFIG_OF_DYNAMIC)
 /* For updating the device tree at runtime */
-extern void of_attach_node(struct device_node *);
-extern void of_detach_node(struct device_node *);
-#endif
+#define OF_RECONFIG_ATTACH_NODE		0x0001
+#define OF_RECONFIG_DETACH_NODE		0x0002
+#define OF_RECONFIG_ADD_PROPERTY	0x0003
+#define OF_RECONFIG_REMOVE_PROPERTY	0x0004
+#define OF_RECONFIG_UPDATE_PROPERTY	0x0005
+
+struct of_prop_reconfig {
+	struct device_node	*dn;
+	struct property		*prop;
+};
+
+extern int of_reconfig_notifier_register(struct notifier_block *);
+extern int of_reconfig_notifier_unregister(struct notifier_block *);
+extern int of_reconfig_notify(unsigned long, void *);
+
+extern int of_attach_node(struct device_node *);
+extern int of_detach_node(struct device_node *);
 
 #define of_match_ptr(_ptr)	(_ptr)
 
-- 
cgit v1.2.3


From 79d1c712958f94372482ad74578b00f44e744c12 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Tue, 2 Oct 2012 16:58:46 +0000
Subject: powerpc+of: Rename the drivers/of prom_* functions to of_*

Rename the prom_*_property routines of the generic OF code to of_*_property.
This brings them in line with the naming used by the rest of the OF code.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Acked-by: Geoff Levand <geoff@infradead.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/of.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index fb5d87b66e3e..a093b2fe5dfb 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -268,10 +268,9 @@ extern int of_alias_get_id(struct device_node *np, const char *stem);
 
 extern int of_machine_is_compatible(const char *compat);
 
-extern int prom_add_property(struct device_node* np, struct property* prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
-				struct property *newprop);
+extern int of_add_property(struct device_node *np, struct property *prop);
+extern int of_remove_property(struct device_node *np, struct property *prop);
+extern int of_update_property(struct device_node *np, struct property *newprop);
 
 /* For updating the device tree at runtime */
 #define OF_RECONFIG_ATTACH_NODE		0x0001
-- 
cgit v1.2.3


From 621eb19ce1ec216e03ad354cb0c4061736b2a436 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 14 Nov 2012 10:48:05 -0500
Subject: svcrpc: Revert "sunrpc/cache.h: replace simple_strtoul"

Commit bbf43dc888833ac0539e437dbaeb28bfd4fbab9f "sunrpc/cache.h: replace
simple_strtoul" introduced new range-checking which could cause get_int
to fail on unsigned integers too large to be represented as an int.

We could parse them as unsigned instead--but it turns out svcgssd is
actually passing down "-1" in some cases.  Which is perhaps stupid, but
there's nothing we can do about it now.

So just revert back to the previous "sloppy" behavior that accepts
either representation.

Cc: stable@vger.kernel.org
Reported-by: Sven Geggus <lists@fuchsschwanzdomain.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index f792794f6634..5dc9ee4d616e 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -217,6 +217,8 @@ extern int qword_get(char **bpp, char *dest, int bufsize);
 static inline int get_int(char **bpp, int *anint)
 {
 	char buf[50];
+	char *ep;
+	int rv;
 	int len = qword_get(bpp, buf, sizeof(buf));
 
 	if (len < 0)
@@ -224,9 +226,11 @@ static inline int get_int(char **bpp, int *anint)
 	if (len == 0)
 		return -ENOENT;
 
-	if (kstrtoint(buf, 0, anint))
+	rv = simple_strtol(buf, &ep, 0);
+	if (*ep)
 		return -EINVAL;
 
+	*anint = rv;
 	return 0;
 }
 
-- 
cgit v1.2.3


From fc05d5a30dc19dd4c6d161e551719a8c597c7890 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 4 Oct 2012 09:28:49 +0200
Subject: mtd: delete nomadik_nand driver

The nomadik_nand driver is really just a subset of the FSMC
NAND driver, and there are no users anymore so let's delete
it.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/platform_data/mtd-nomadik-nand.h | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 include/linux/platform_data/mtd-nomadik-nand.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/mtd-nomadik-nand.h b/include/linux/platform_data/mtd-nomadik-nand.h
deleted file mode 100644
index c3c8254c22a5..000000000000
--- a/include/linux/platform_data/mtd-nomadik-nand.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __ASM_ARCH_NAND_H
-#define __ASM_ARCH_NAND_H
-
-struct nomadik_nand_platform_data {
-	struct mtd_partition *parts;
-	int nparts;
-	int options;
-	int (*init) (void);
-	int (*exit) (void);
-};
-
-#define NAND_IO_DATA	0x40000000
-#define NAND_IO_CMD	0x40800000
-#define NAND_IO_ADDR	0x41000000
-
-#endif				/* __ASM_ARCH_NAND_H */
-- 
cgit v1.2.3


From 6d7b42a447f92eb3e7e410bbf62042693eb040f7 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Thu, 4 Oct 2012 15:14:16 +0200
Subject: mtd: fsmc_nand: pass the ale and cmd resource via resource

Do not use the platform_data to pass resource and be smart in the drivers.
Just pass it via resource

Switch to devm_request_and_ioremap at the sametime

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-By: Vipin Kumar <vipin.kumar@st.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/fsmc.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index b20029221fb1..d6ed61ef451d 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h
@@ -155,9 +155,6 @@ struct fsmc_nand_platform_data {
 	unsigned int		width;
 	unsigned int		bank;
 
-	/* CLE, ALE offsets */
-	unsigned int		cle_off;
-	unsigned int		ale_off;
 	enum access_mode	mode;
 
 	void			(*select_bank)(uint32_t bank, uint32_t busw);
-- 
cgit v1.2.3


From 2f25ae97fe4b424d88d765797c46456c7c0f1bae Mon Sep 17 00:00:00 2001
From: Vipin Kumar <vipin.kumar@st.com>
Date: Tue, 9 Oct 2012 16:14:53 +0530
Subject: mtd: nand: Increase the ecc placement locations to 640

Few devices like H27UBG8T2CTR have a writesize/oobsize of 8KB/640B.
This means that the maximum oobsize has gone up to 640 bytes and consequently
the maximum ecc placement locations have also gone up to 640.

Signed-off-by: Vipin Kumar <vipin.kumar@st.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/mtd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 81d61e704599..f9ac2897b86b 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -98,7 +98,7 @@ struct mtd_oob_ops {
 };
 
 #define MTD_MAX_OOBFREE_ENTRIES_LARGE	32
-#define MTD_MAX_ECCPOS_ENTRIES_LARGE	448
+#define MTD_MAX_ECCPOS_ENTRIES_LARGE	640
 /*
  * Internal ECC layout control structure. For historical reasons, there is a
  * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained
-- 
cgit v1.2.3


From 5de0b52ea8f8f5149502867acff2efb5efaf1fc2 Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie8@gmail.com>
Date: Sat, 13 Oct 2012 13:03:29 -0400
Subject: mtd: gpmi: remove unneccessary header

The whole gpmi-nand driver has turned to pure devicetree supported.
So the linux/mtd/gpmi-nand.h is not neccessary now. Just remove it,
and move some macros to the gpmi-nand driver itself.

Signed-off-by: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/gpmi-nand.h | 68 -------------------------------------------
 1 file changed, 68 deletions(-)
 delete mode 100644 include/linux/mtd/gpmi-nand.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
deleted file mode 100644
index ed3c4e09f3d1..000000000000
--- a/include/linux/mtd/gpmi-nand.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef __MACH_MXS_GPMI_NAND_H__
-#define __MACH_MXS_GPMI_NAND_H__
-
-/* The size of the resources is fixed. */
-#define GPMI_NAND_RES_SIZE	6
-
-/* Resource names for the GPMI NAND driver. */
-#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
-#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
-#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
-#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
-#define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
-#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
-
-/**
- * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
- *
- * This structure communicates platform-specific information to the GPMI NAND
- * driver that can't be expressed as resources.
- *
- * @platform_init:           A pointer to a function the driver will call to
- *                           initialize the platform (e.g., set up the pin mux).
- * @min_prop_delay_in_ns:    Minimum propagation delay of GPMI signals to and
- *                           from the NAND Flash device, in nanoseconds.
- * @max_prop_delay_in_ns:    Maximum propagation delay of GPMI signals to and
- *                           from the NAND Flash device, in nanoseconds.
- * @max_chip_count:          The maximum number of chips for which the driver
- *                           should configure the hardware. This value most
- *                           likely reflects the number of pins that are
- *                           connected to a NAND Flash device. If this is
- *                           greater than the SoC hardware can support, the
- *                           driver will print a message and fail to initialize.
- * @partitions:              An optional pointer to an array of partition
- *                           descriptions.
- * @partition_count:         The number of elements in the partitions array.
- */
-struct gpmi_nand_platform_data {
-	/* SoC hardware information. */
-	int		(*platform_init)(void);
-
-	/* NAND Flash information. */
-	unsigned int	min_prop_delay_in_ns;
-	unsigned int	max_prop_delay_in_ns;
-	unsigned int	max_chip_count;
-
-	/* Medium information. */
-	struct		mtd_partition *partitions;
-	unsigned	partition_count;
-};
-#endif
-- 
cgit v1.2.3


From e8a9d8f31c592eea89f1b0d3fd425e7a96944e88 Mon Sep 17 00:00:00 2001
From: Bastian Hecht <hechtb@googlemail.com>
Date: Fri, 19 Oct 2012 12:15:34 +0200
Subject: mtd: sh_flctl: Minor cleanups

Some small fixes to avoid sparse and smatch complain. Other cosmetic fixes
as well.

- Change of the type of the member index in struct sh_flctl from signed
to unsigned. We use index by addressing array members, so unsigned is more
concise here. Adapt functions relying on sh_flctl::index.
- Remove a blurring cast in write_fiforeg().
- Apply consistent naming scheme when refering to the data buffer.
- Shorten some unnecessarily verbose functions.
- Remove spaces at start of lines.

Signed-off-by: Bastian Hecht <hechtb@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/sh_flctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 01e4b15b280e..481557688d05 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -147,7 +147,7 @@ struct sh_flctl {
 
 	uint8_t	done_buff[2048 + 64];	/* max size 2048 + 64 */
 	int	read_bytes;
-	int	index;
+	unsigned int index;
 	int	seqin_column;		/* column in SEQIN cmd */
 	int	seqin_page_addr;	/* page_addr in SEQIN cmd */
 	uint32_t seqin_read_cmd;		/* read cmd in SEQIN cmd */
-- 
cgit v1.2.3


From 83738d87e3a0a4096e1419a65b8228130d183df6 Mon Sep 17 00:00:00 2001
From: Bastian Hecht <hechtb@googlemail.com>
Date: Fri, 19 Oct 2012 12:15:35 +0200
Subject: mtd: sh_flctl: Add DMA capabilty

The code probes if DMA channels can get allocated and tears them down at
removal/failure if needed.
If available it uses them to transfer the data part (not ECC). On
failure we fall back to PIO mode.

Based on Guennadi Liakhovetski's code from the sh_mmcif driver.

Signed-off-by: Bastian Hecht <hechtb@gmail.com>
Reviewed-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/sh_flctl.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 481557688d05..1c28f8879b1c 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -20,6 +20,7 @@
 #ifndef __SH_FLCTL_H__
 #define __SH_FLCTL_H__
 
+#include <linux/completion.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
@@ -107,6 +108,7 @@
 #define ESTERINTE	(0x1 << 24)	/* ECC error interrupt enable */
 #define AC1CLR		(0x1 << 19)	/* ECC FIFO clear */
 #define AC0CLR		(0x1 << 18)	/* Data FIFO clear */
+#define DREQ0EN		(0x1 << 16)	/* FLDTFIFODMA Request Enable */
 #define ECERB		(0x1 << 9)	/* ECC error */
 #define STERB		(0x1 << 8)	/* Status error */
 #define STERINTE	(0x1 << 4)	/* Status error enable */
@@ -138,6 +140,8 @@ enum flctl_ecc_res_t {
 	FL_TIMEOUT
 };
 
+struct dma_chan;
+
 struct sh_flctl {
 	struct mtd_info		mtd;
 	struct nand_chip	chip;
@@ -161,6 +165,11 @@ struct sh_flctl {
 	unsigned hwecc:1;	/* Hardware ECC (0 = disabled, 1 = enabled) */
 	unsigned holden:1;	/* Hardware has FLHOLDCR and HOLDEN is set */
 	unsigned qos_request:1;	/* QoS request to prevent deep power shutdown */
+
+	/* DMA related objects */
+	struct dma_chan		*chan_fifo0_rx;
+	struct dma_chan		*chan_fifo0_tx;
+	struct completion	dma_complete;
 };
 
 struct sh_flctl_platform_data {
@@ -170,6 +179,9 @@ struct sh_flctl_platform_data {
 
 	unsigned has_hwecc:1;
 	unsigned use_holden:1;
+
+	unsigned int            slave_id_fifo0_tx;
+	unsigned int            slave_id_fifo0_rx;
 };
 
 static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo)
-- 
cgit v1.2.3


From 9ef525a9141b14d23613faad303cf48a20814f1b Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Thu, 25 Oct 2012 09:43:10 -0400
Subject: mtd: Fix kernel-doc content to avoid warning.

Add missing colons to fix kernel-doc generation warnings.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/nand.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 24e915957e4f..9d8a6048aacd 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -471,8 +471,8 @@ struct nand_buffers {
  *			non 0 if ONFI supported.
  * @onfi_params:	[INTERN] holds the ONFI page parameter when ONFI is
  *			supported, 0 otherwise.
- * @onfi_set_features	[REPLACEABLE] set the features for ONFI nand
- * @onfi_get_features	[REPLACEABLE] get the features for ONFI nand
+ * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
+ * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
  * @ecclayout:		[REPLACEABLE] the default ECC placement scheme
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
-- 
cgit v1.2.3


From 3e9ce49e0ef95e22790a74720f0068696b2477c9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 29 Oct 2012 22:47:26 +0530
Subject: mtd: map: Fix compilation warning

This patch is an attempt to fix following compilation warning.

In file included from drivers/mtd/chips/cfi_cmdset_0001.c:35:0:
drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wmaybe-uninitialized]

I could have used uninitialized_var() too, but didn't used it as the final else
part of map_word_load() is missing. So there is a chance that it might be passed
uninitialized. Better initialize to zero.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 3595a0236b0f..56c7936e0c65 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -328,7 +328,7 @@ static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word
 
 static inline map_word map_word_load(struct map_info *map, const void *ptr)
 {
-	map_word r;
+	map_word r = {{0} };
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = *(unsigned char *)ptr;
-- 
cgit v1.2.3


From ebd5ac165f2aaefb767c53112c2010b0ff3df688 Mon Sep 17 00:00:00 2001
From: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Date: Wed, 24 Oct 2012 19:58:10 +0900
Subject: i2c: i2c-sh_mobile: support I2C hardware block with a faster
 operating clock

On newer SH-/R-Mobile SoCs, a clock supply to the I2C hardware block,
which is used to generate the SCL clock output, is getting faster than
before, while on the other hand, the SCL clock control registers, ICCH
and ICCL, stay unchanged in 9-bit-wide (8+1).

On such silicons, the internal SCL clock counter gets incremented every
2 clocks of the operating clock.

This patch makes it configurable through platform data.

Signed-off-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/i2c/i2c-sh_mobile.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
index beda7081aead..06e3089795fb 100644
--- a/include/linux/i2c/i2c-sh_mobile.h
+++ b/include/linux/i2c/i2c-sh_mobile.h
@@ -5,6 +5,7 @@
 
 struct i2c_sh_mobile_platform_data {
 	unsigned long bus_speed;
+	unsigned int clks_per_count;
 };
 
 #endif /* __I2C_SH_MOBILE_H__ */
-- 
cgit v1.2.3


From d611d41b46c96195b9a168a21992782458826e07 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 6 Nov 2012 22:55:27 +0100
Subject: mtd: diskonchip: use inline functions for DocRead/DocWrite

The diskonchip drivers traditionally use home-grown macros for
doing MMIO accesses, which cause a lot of warnings, at least
on ARM machines:

drivers/mtd/devices/doc2000.c: In function 'doc_write':
drivers/mtd/devices/doc2000.c:854:5: warning: value computed is not used [-Wunused-value]
drivers/mtd/devices/doc2000.c: In function 'doc_erase':
drivers/mtd/devices/doc2000.c:1123:5: warning: value computed is not used [-Wunused-value
drivers/mtd/nand/diskonchip.c: In function 'doc2000_read_byte':
drivers/mtd/nand/diskonchip.c:318:3: warning: value computed is not used [-Wunused-value]

A nicer solution is to use the architecture-defined I/O accessors.
Here, we use the __raw_readl/__raw_writel style, instead of the
proper readl/writel ones, in order to preserve the odd semantics
of the existing macros that have their own barrier implementation
and no byte swap. It would be nice to fix this properly and use
the correct accessors as well as make the word size independent
from the architecture, but I guess the hardware is obsolete
enough that we should better not mess the driver an more than
necessary.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/doc2000.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/doc2000.h b/include/linux/mtd/doc2000.h
index 0f6fea73a1f6..407d1e556c39 100644
--- a/include/linux/mtd/doc2000.h
+++ b/include/linux/mtd/doc2000.h
@@ -92,12 +92,26 @@
  * Others use readb/writeb
  */
 #if defined(__arm__)
-#define ReadDOC_(adr, reg)      ((unsigned char)(*(volatile __u32 *)(((unsigned long)adr)+((reg)<<2))))
-#define WriteDOC_(d, adr, reg)  do{ *(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)) = (__u32)d; wmb();} while(0)
+static inline u8 ReadDOC_(u32 __iomem *addr, unsigned long reg)
+{
+	return __raw_readl(addr + reg);
+}
+static inline void WriteDOC_(u8 data, u32 __iomem *addr, unsigned long reg)
+{
+	__raw_writel(data, addr + reg);
+	wmb();
+}
 #define DOC_IOREMAP_LEN 0x8000
 #elif defined(__ppc__)
-#define ReadDOC_(adr, reg)      ((unsigned char)(*(volatile __u16 *)(((unsigned long)adr)+((reg)<<1))))
-#define WriteDOC_(d, adr, reg)  do{ *(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)) = (__u16)d; wmb();} while(0)
+static inline u8 ReadDOC_(u16 __iomem *addr, unsigned long reg)
+{
+	return __raw_readw(addr + reg);
+}
+static inline void WriteDOC_(u8 data, u16 __iomem *addr, unsigned long reg)
+{
+	__raw_writew(data, addr + reg);
+	wmb();
+}
 #define DOC_IOREMAP_LEN 0x4000
 #else
 #define ReadDOC_(adr, reg)      readb((void __iomem *)(adr) + (reg))
-- 
cgit v1.2.3


From 5d27aa5af04f58f3020de1c224dcf8a62151fd58 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 6 Nov 2012 22:55:28 +0100
Subject: mtd: uninitialized variable warning in map.h

The map_word_load() function initializes exactly
as many words in the buffer as required, but gcc
cannot figure this out and gives a misleading
warning. Marking the local variable as
uninitialized_var shuts up that warning.

Without this patch, building acs5k_defconfig results in:

drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_panic_write':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]
drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]
drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 56c7936e0c65..f6eb4332ac92 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -391,7 +391,7 @@ static inline map_word map_word_ff(struct map_info *map)
 
 static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
 {
-	map_word r;
+	map_word uninitialized_var(r);
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = __raw_readb(map->virt + ofs);
-- 
cgit v1.2.3


From 0857ba3c24c308f42a242fe8a1894772750230ce Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 18 Nov 2012 18:36:19 +0200
Subject: i2c: i2c-cbus-gpio: introduce driver

Add i2c driver to enable access to devices behind CBUS on Nokia Internet
Tablets.

The patch also adds CBUS I2C configuration for N8x0 which is one of the
users of this driver.

Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/platform_data/i2c-cbus-gpio.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 include/linux/platform_data/i2c-cbus-gpio.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/i2c-cbus-gpio.h b/include/linux/platform_data/i2c-cbus-gpio.h
new file mode 100644
index 000000000000..6faa992a9502
--- /dev/null
+++ b/include/linux/platform_data/i2c-cbus-gpio.h
@@ -0,0 +1,27 @@
+/*
+ * i2c-cbus-gpio.h - CBUS I2C platform_data definition
+ *
+ * Copyright (C) 2004-2009 Nokia Corporation
+ *
+ * Written by Felipe Balbi and Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __INCLUDE_LINUX_I2C_CBUS_GPIO_H
+#define __INCLUDE_LINUX_I2C_CBUS_GPIO_H
+
+struct i2c_cbus_platform_data {
+	int dat_gpio;
+	int clk_gpio;
+	int sel_gpio;
+};
+
+#endif /* __INCLUDE_LINUX_I2C_CBUS_GPIO_H */
-- 
cgit v1.2.3


From ae72ae676045274c82f3c25159a9dd7cfcf5ffae Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 11:02:55 -0500
Subject: NFSv4.1: Don't confuse CREATE_SESSION arguments and results

Don't store the target request and response sizes in the same
variables used to store the server's replies to those targets.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a9e76ee1adca..97c8f9191880 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -242,6 +242,9 @@ struct nfs4_session {
 	struct nfs4_channel_attrs	bc_attrs;
 	struct nfs4_slot_table		bc_slot_table;
 	struct nfs_client		*clp;
+	/* Create session arguments */
+	unsigned int			fc_target_max_rqst_sz;
+	unsigned int			fc_target_max_resp_sz;
 };
 
 #endif /* CONFIG_NFS_V4 */
-- 
cgit v1.2.3


From 933602e368c4452260c9bff4fbb3baba35cf987a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Nov 2012 12:12:38 -0500
Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving sr_renewal_time

Store the renewal time inside the session slot instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a73ea89789d1..9cb1c63a70c2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -187,6 +187,7 @@ struct nfs4_channel_attrs {
 
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
+	unsigned long		renewal_time;
 	u32		 	seq_nr;
 };
 
@@ -200,7 +201,6 @@ struct nfs4_sequence_res {
 	struct nfs4_session	*sr_session;
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
-	unsigned long		sr_renewal_time;
 	u32			sr_status_flags;
 };
 
-- 
cgit v1.2.3


From 22a8578fca5a47e643bb4f70c232d0ec84db9e4e Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <elezegarcia@gmail.com>
Date: Sat, 10 Nov 2012 13:08:20 -0300
Subject: mtd: mtd_blkdevs: Replace request handler kthread with a workqueue

By replacing a kthread with a workqueue, the code is now a bit clearer.
There's also a slight reduction of code size (numbers apply for x86):
Before:
   text	   data	    bss	    dec	    hex	filename
   3248	     36	      0	   3284	    cd4	drivers/mtd/mtd_blkdevs.o

After:
   text	   data	    bss	    dec	    hex	filename
   3150	     36	      0	   3186	    c72	drivers/mtd/mtd_blkdevs.o

Due to lack of real hardware, tests have been performed on an emulated
environment with mtdswap and mtdblock over nandsim devices.
Some real testing should be done, before merging this patch.

Signed-off-by: Ezequiel Garcia <elezegarcia@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/blktrans.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index ed270bd2e4df..4eb0a50d0c55 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/kref.h>
 #include <linux/sysfs.h>
+#include <linux/workqueue.h>
 
 struct hd_geometry;
 struct mtd_info;
@@ -43,7 +44,8 @@ struct mtd_blktrans_dev {
 	struct kref ref;
 	struct gendisk *disk;
 	struct attribute_group *disk_attributes;
-	struct task_struct *thread;
+	struct workqueue_struct *wq;
+	struct work_struct work;
 	struct request_queue *rq;
 	spinlock_t queue_lock;
 	void *priv;
-- 
cgit v1.2.3


From 8d4b9e3182634d8b5afb5a144a8c6c24b187bcc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Mon, 12 Nov 2012 13:03:20 +0100
Subject: bcma: export PLL reading function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required by NAND flash driver for initializing wait counters.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/bcma/bcma.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 4180eb78d575..4fb6bd7941d7 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -345,6 +345,7 @@ extern void bcma_core_set_clockmode(struct bcma_device *core,
 				    enum bcma_clkmode clkmode);
 extern void bcma_core_pll_ctl(struct bcma_device *core, u32 req, u32 status,
 			      bool on);
+extern u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset);
 #define BCMA_DMA_TRANSLATION_MASK	0xC0000000
 #define  BCMA_DMA_TRANSLATION_NONE	0x00000000
 #define  BCMA_DMA_TRANSLATION_DMA32_CMT	0x40000000 /* Client Mode Translation for 32-bit DMA */
-- 
cgit v1.2.3


From 83af24027b3df1af5c5a9aa9adcdcfeb3429d3be Mon Sep 17 00:00:00 2001
From: "Philip, Avinash" <avinashphilip@ti.com>
Date: Wed, 21 Nov 2012 13:10:44 +0530
Subject: pwm: Device tree support for PWM polarity

Add support for encoding PWM properties in bit encoded form with
of_pwm_xlate_with_flags() function support. Platforms require platform
specific PWM properties has to populate in 3rd cell of the pwm-specifier
and PWM driver should also set .of_xlate support with this function.
Currently PWM property polarity encoded in bit position 0 of the third
cell in pwm-specifier.

Signed-off-by: Philip, Avinash <avinashphilip@ti.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
---
 include/linux/pwm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 112b31436848..6d661f32e0e4 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -171,6 +171,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
 					 unsigned int index,
 					 const char *label);
 
+struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
+		const struct of_phandle_args *args);
+
 struct pwm_device *pwm_get(struct device *dev, const char *consumer);
 void pwm_put(struct pwm_device *pwm);
 
-- 
cgit v1.2.3


From e3725ec015dfbbeb896295cf2b3a995f28b0630e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Nov 2012 12:25:01 -0500
Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving the session
 pointer

Move the session pointer into the slot table, then have struct nfs4_slot
point to that slot table.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 include/linux/nfs_xdr.h   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 97c8f9191880..b0412873d29c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -209,6 +209,7 @@ struct nfs_server {
 /* Sessions */
 #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
 struct nfs4_slot_table {
+	struct nfs4_session *session;		/* Parent session */
 	struct nfs4_slot *slots;		/* seqid per slot */
 	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
 	spinlock_t	slot_tbl_lock;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9cb1c63a70c2..0fd88ab0e814 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -187,6 +187,7 @@ struct nfs4_channel_attrs {
 
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
+	struct nfs4_slot_table	*table;
 	unsigned long		renewal_time;
 	u32		 	seq_nr;
 };
@@ -198,7 +199,6 @@ struct nfs4_sequence_args {
 };
 
 struct nfs4_sequence_res {
-	struct nfs4_session	*sr_session;
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
-- 
cgit v1.2.3


From df2fabffbace8988f3265585ec793ff9deccdea7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Nov 2012 12:45:06 -0500
Subject: NFSv4.1: Label each entry in the session slot tables with its slot
 number

Instead of doing slot table pointer gymnastics every time we want to
know which slot we're using.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0fd88ab0e814..9c9b76c94b46 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -189,6 +189,7 @@ struct nfs4_channel_attrs {
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
 	unsigned long		renewal_time;
+	u32			slot_nr;
 	u32		 	seq_nr;
 };
 
-- 
cgit v1.2.3


From 2b2fa71723f955d5b4a0f4edd99cf3cd69ceafd1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Nov 2012 12:58:36 -0500
Subject: NFSv4.1: Simplify struct nfs4_sequence_args too

Replace the session pointer + slotid with a pointer to the
allocated slot.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9c9b76c94b46..deb31bbbb857 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -194,8 +194,7 @@ struct nfs4_slot {
 };
 
 struct nfs4_sequence_args {
-	struct nfs4_session	*sa_session;
-	u32			sa_slotid;
+	struct nfs4_slot	*sa_slot;
 	u8			sa_cache_this;
 };
 
-- 
cgit v1.2.3


From 31fbcda71489d8cbe2b82819eaab4818524e3a49 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 Sep 2012 10:29:07 +0100
Subject: Input: bu21013_ts - move GPIO init and exit functions into the driver

These GPIO init and exit functions have no place in platform data, they
should be part of the driver instead,

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/bu21013.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h
index 05e03284b92a..6230d76bde5d 100644
--- a/include/linux/input/bu21013.h
+++ b/include/linux/input/bu21013.h
@@ -9,13 +9,10 @@
 
 /**
  * struct bu21013_platform_device - Handle the platform data
- * @cs_en:	pointer to the cs enable function
- * @cs_dis:	pointer to the cs disable function
- * @irq_read_val:    pointer to read the pen irq value function
  * @touch_x_max: touch x max
  * @touch_y_max: touch y max
  * @cs_pin: chip select pin
- * @irq: irq pin
+ * @touch_pin: touch gpio pin
  * @ext_clk: external clock flag
  * @x_flip: x flip flag
  * @y_flip: y flip flag
@@ -24,13 +21,10 @@
  * This is used to handle the platform data
  */
 struct bu21013_platform_device {
-	int (*cs_en)(int reset_pin);
-	int (*cs_dis)(int reset_pin);
-	int (*irq_read_val)(void);
 	int touch_x_max;
 	int touch_y_max;
 	unsigned int cs_pin;
-	unsigned int irq;
+	unsigned int touch_pin;
 	bool ext_clk;
 	bool x_flip;
 	bool y_flip;
-- 
cgit v1.2.3


From 972deb4f49b5b6703d9c6117ba0aeda2180d4447 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti D <shubhrajyoti@ti.com>
Date: Mon, 26 Nov 2012 15:25:11 +0530
Subject: i2c: omap: Remove the OMAP_I2C_FLAG_RESET_REGS_POSTIDLE flag

The OMAP_I2C_FLAG_RESET_REGS_POSTIDLE is not used anymore
in the i2c driver. Remove the flag.

Signed-off-by: Shubhrajyoti D <shubhrajyoti@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/i2c-omap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index 1b25c04f82d9..babe0cf6d56b 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -20,7 +20,6 @@
 #define OMAP_I2C_FLAG_NO_FIFO			BIT(0)
 #define OMAP_I2C_FLAG_SIMPLE_CLOCK		BIT(1)
 #define OMAP_I2C_FLAG_16BIT_DATA_REG		BIT(2)
-#define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE	BIT(3)
 #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK	BIT(5)
 #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK	BIT(6)
 /* how the CPU address bus must be translated for I2C unit access */
-- 
cgit v1.2.3


From 64b37b2a63eb2f80b65c7185f0013f8ffc637ae3 Mon Sep 17 00:00:00 2001
From: Matthieu CASTET <matthieu.castet@parrot.com>
Date: Tue, 6 Nov 2012 11:51:44 +0100
Subject: mtd: nand: add NAND_BUSWIDTH_AUTO to autodetect bus width

The driver call nand_scan_ident in 8 bit mode, then
readid or onfi detection are done (and detect bus width).
The driver should update its bus width before calling nand_scan_tail.

This work because readid and onfi are read work 8 byte mode.

Note that nand_scan_ident send command (NAND_CMD_RESET, NAND_CMD_READID, NAND_CMD_PARAM), address and read data
The ONFI specificication is not very clear for x16 device if high byte of address should be driven to 0,
but according to [1] it should be ok to not drive it during autodetection.

[1]
3.3.2. Target Initialization

[...]
The Read ID and Read Parameter Page commands only use the lower 8-bits of the data bus.
The host shall not issue commands that use a word data width on x16 devices until the host
determines the device supports a 16-bit data bus width in the parameter page.

Signed-off-by: Matthieu CASTET <matthieu.castet@parrot.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/nand.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9d8a6048aacd..7ccb3c59ed60 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -219,6 +219,13 @@ typedef enum {
 #define NAND_OWN_BUFFERS	0x00020000
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00040000
+/*
+ * Autodetect nand buswidth with readid/onfi.
+ * This suppose the driver will configure the hardware in 8 bits mode
+ * when calling nand_scan_ident, and update its configuration
+ * before calling nand_scan_tail.
+ */
+#define NAND_BUSWIDTH_AUTO      0x00080000
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
-- 
cgit v1.2.3


From 72b15b6ae97796c5fac687addde5dbfab872cf94 Mon Sep 17 00:00:00 2001
From: Omar Ramirez Luna <omar.luna@linaro.org>
Date: Mon, 19 Nov 2012 19:05:50 -0600
Subject: iommu/omap: Migrate to hwmod framework

Use hwmod data and device attributes to build and register an
omap device for iommu driver.

 - Update the naming convention in isp module.
 - Remove unneeded check for number of resources, as this is now
   handled by omap_device and prevents driver from loading.
 - Now unused, remove platform device and resource data, handling
   of sysconfig register for softreset purposes, use default
   latency structure.
 - Use hwmod API for reset handling.

Signed-off-by: Omar Ramirez Luna <omar.luna@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 include/linux/platform_data/iommu-omap.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index c677b9f2fefa..ef2060d7eeb8 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/platform_device.h>
+
 #define MMU_REG_SIZE		256
 
 /**
@@ -43,7 +45,11 @@ struct omap_mmu_dev_attr {
 struct iommu_platform_data {
 	const char *name;
 	const char *clk_name;
-	const int nr_tlb_entries;
+	const char *reset_name;
+	int nr_tlb_entries;
 	u32 da_start;
 	u32 da_end;
+
+	int (*assert_reset)(struct platform_device *pdev, const char *name);
+	int (*deassert_reset)(struct platform_device *pdev, const char *name);
 };
-- 
cgit v1.2.3


From ebf7cda0f92effd8169b831fae81e9437dce1fef Mon Sep 17 00:00:00 2001
From: Omar Ramirez Luna <omar.luna@linaro.org>
Date: Mon, 19 Nov 2012 19:05:51 -0600
Subject: iommu/omap: Adapt to runtime pm

Use runtime PM functionality interfaced with hwmod enable/idle
functions, to replace direct clock operations and sysconfig
handling.

Due to reset sequence, pm_runtime_[get|put]_sync must be used, to
avoid possible operations with the module under reset. Because of
this and given that the driver uses spin_locks to protect their
critical sections, we must use pm_runtime_irq_safe in order for the
runtime ops to be happy, otherwise might_sleep_if checks in runtime
framework will complain.

The remaining pm_runtime out of iommu_enable and iommu_disable
corresponds to paths that can be accessed through debugfs, some of
them doesn't work if the module is not enabled first, but in future
if the mmu is idled withouth freeing, these are needed to debug.

Signed-off-by: Omar Ramirez Luna <omar.luna@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 include/linux/platform_data/iommu-omap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index ef2060d7eeb8..5b429c43a297 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -44,7 +44,6 @@ struct omap_mmu_dev_attr {
 
 struct iommu_platform_data {
 	const char *name;
-	const char *clk_name;
 	const char *reset_name;
 	int nr_tlb_entries;
 	u32 da_start;
-- 
cgit v1.2.3


From cc248d4b1ddf05fefc1373d9d7a4dd1df71b6190 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 16:11:13 -0500
Subject: svcrpc: don't byte-swap sk_reclen in place

Byte-swapping in place is always a little dubious.

Let's instead define this field to always be big-endian, and do the
swapping on demand where we need it.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 92ad02f0dcc0..613cf42227aa 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,11 +26,21 @@ struct svc_sock {
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	u32			sk_reclen;	/* length of record */
+	__be32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
+static inline u32 svc_sock_reclen(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK;
+}
+
+static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT;
+}
+
 /*
  * Function prototypes.
  */
-- 
cgit v1.2.3


From 8af345f58ac9b350bb23c1457c613381d9f00472 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 16:45:35 -0500
Subject: svcrpc: track rpc data length separately from sk_tcplen

Keep a separate field, sk_datalen, that tracks only the data contained
in a fragment, not including the fragment header.

For now, this is always just max(0, sk_tcplen - 4), but after we allow
multiple fragments sk_datalen will accumulate the total rpc data size
while sk_tcplen only tracks progress receiving the current fragment.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 613cf42227aa..62fd1b756e99 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,8 +26,15 @@ struct svc_sock {
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	__be32			sk_reclen;	/* length of record */
-	u32			sk_tcplen;	/* current read length */
+	/* On-the-wire fragment header: */
+	__be32			sk_reclen;
+	/* As we receive a record, this includes the length received so
+	 * far (including the fragment header): */
+	u32			sk_tcplen;
+	/* Total length of the data (not including fragment headers)
+	 * received so far in the fragments making up this rpc: */
+	u32			sk_datalen;
+
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
-- 
cgit v1.2.3


From 464ee9f966404786ba4c6be35dc8362ee8e6ba4e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 12:49:27 -0500
Subject: NFSv4.1: Ensure that the client tracks the server
 target_highest_slotid

Dynamic slot allocation in NFSv4.1 depends on the client being able to
track the server's target value for the highest slotid in the
slot table.  See the reference in Section 2.10.6.1 of RFC5661.

To avoid ordering problems in the case where 2 SEQUENCE replies contain
conflicting updates to this target value, we also introduce a generation
counter, to track whether or not an RPC containing a SEQUENCE operation
was launched before or after the last update.

Also rename the nfs4_slot_table target_max_slots field to
'target_highest_slotid' to avoid confusion with a slot
table size or number of slots.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 5 +++--
 include/linux/nfs_xdr.h   | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b0412873d29c..57d406997def 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -217,8 +217,9 @@ struct nfs4_slot_table {
 	u32		max_slots;		/* # slots in table */
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
-	u32		target_max_slots;	/* Set by CB_RECALL_SLOT as
-						 * the new max_slots */
+	u32		target_highest_slotid;	/* Server max_slot target */
+	unsigned long	generation;		/* Generation counter for
+						   target_highest_slotid */
 	struct completion complete;
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index deb31bbbb857..08c47db7417f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,6 +188,7 @@ struct nfs4_channel_attrs {
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
+	unsigned long		generation;
 	unsigned long		renewal_time;
 	u32			slot_nr;
 	u32		 	seq_nr;
@@ -202,6 +203,7 @@ struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
+	u32			sr_target_highest_slotid;
 };
 
 struct nfs4_get_lease_time_args {
-- 
cgit v1.2.3


From da0507b7c95ccd4d9c86394eef42fe076032af30 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 18:10:30 -0500
Subject: NFSv4.1: Reset the sequence number for slots that have been
 deallocated

When the server tells us that it is dynamically resizing the session
replay cache, we should reset the sequence number for those slots
that have been deallocated.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 include/linux/nfs_xdr.h   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 57d406997def..646e64bbff4c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -218,6 +218,7 @@ struct nfs4_slot_table {
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
 	u32		target_highest_slotid;	/* Server max_slot target */
+	u32		server_highest_slotid;	/* Server highest slotid */
 	unsigned long	generation;		/* Generation counter for
 						   target_highest_slotid */
 	struct completion complete;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 08c47db7417f..3ddb08fba935 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -203,6 +203,7 @@ struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
+	u32			sr_highest_slotid;
 	u32			sr_target_highest_slotid;
 };
 
-- 
cgit v1.2.3


From 97e548a93de213b149eea025a97d88e28143b445 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 14:45:48 -0500
Subject: NFSv4.1: Support dynamic resizing of the session slot table

Allow the server to control the size of the session slot table
by adjusting the value of sr_target_max_slots in the reply to the
SEQUENCE operation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 646e64bbff4c..30715508fade 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -215,6 +215,7 @@ struct nfs4_slot_table {
 	spinlock_t	slot_tbl_lock;
 	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
 	u32		max_slots;		/* # slots in table */
+	u32		max_slotid;		/* Max allowed slotid value */
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
 	u32		target_highest_slotid;	/* Server max_slot target */
-- 
cgit v1.2.3


From 87dda67e7386ba7d2164391ea58b34e028d8157b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 19:49:20 -0500
Subject: NFSv4.1: Allow SEQUENCE to resize the slot table on the fly

Instead of an array of slots, use a singly linked list of slots that
can be dynamically appended to or shrunk.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3ddb08fba935..44d256f6021c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,6 +188,7 @@ struct nfs4_channel_attrs {
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
+	struct nfs4_slot	*next;
 	unsigned long		generation;
 	unsigned long		renewal_time;
 	u32			slot_nr;
-- 
cgit v1.2.3


From c34309a45ea491e5f0c0d0af49ccfa018ff35fc1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 26 Nov 2012 14:33:03 -0500
Subject: NFS: Remove unused function slot_idx

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 30715508fade..e707c1b69796 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -225,11 +225,6 @@ struct nfs4_slot_table {
 	struct completion complete;
 };
 
-static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
-{
-	return sp - tbl->slots;
-}
-
 /*
  * Session related parameters
  */
-- 
cgit v1.2.3


From 76e697ba7e8d187f50e385d21a2b2f1709a62c14 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 26 Nov 2012 14:20:49 -0500
Subject: NFSv4.1: Move slot table and session struct definitions to
 nfs4session.h

Clean up. Gather NFSv4.1 slot definitions in fs/nfs/nfs4session.h.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 49 -----------------------------------------------
 include/linux/nfs_xdr.h   | 11 +----------
 2 files changed, 1 insertion(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e707c1b69796..6c6ed153a9b4 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -198,53 +198,4 @@ struct nfs_server {
 #define NFS_CAP_POSIX_LOCK	(1U << 14)
 #define NFS_CAP_UIDGID_NOMAP	(1U << 15)
 
-
-/* maximum number of slots to use */
-#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
-#define NFS4_MAX_SLOT_TABLE (256U)
-#define NFS4_NO_SLOT ((u32)-1)
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-
-/* Sessions */
-#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
-struct nfs4_slot_table {
-	struct nfs4_session *session;		/* Parent session */
-	struct nfs4_slot *slots;		/* seqid per slot */
-	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
-	spinlock_t	slot_tbl_lock;
-	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
-	u32		max_slots;		/* # slots in table */
-	u32		max_slotid;		/* Max allowed slotid value */
-	u32		highest_used_slotid;	/* sent to server on each SEQ.
-						 * op for dynamic resizing */
-	u32		target_highest_slotid;	/* Server max_slot target */
-	u32		server_highest_slotid;	/* Server highest slotid */
-	unsigned long	generation;		/* Generation counter for
-						   target_highest_slotid */
-	struct completion complete;
-};
-
-/*
- * Session related parameters
- */
-struct nfs4_session {
-	struct nfs4_sessionid		sess_id;
-	u32				flags;
-	unsigned long			session_state;
-	u32				hash_alg;
-	u32				ssv_len;
-
-	/* The fore and back channel */
-	struct nfs4_channel_attrs	fc_attrs;
-	struct nfs4_slot_table		fc_slot_table;
-	struct nfs4_channel_attrs	bc_attrs;
-	struct nfs4_slot_table		bc_slot_table;
-	struct nfs_client		*clp;
-	/* Create session arguments */
-	unsigned int			fc_target_max_rqst_sz;
-	unsigned int			fc_target_max_resp_sz;
-};
-
-#endif /* CONFIG_NFS_V4 */
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 44d256f6021c..2076149db1a4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -185,16 +185,7 @@ struct nfs4_channel_attrs {
 	u32			max_reqs;
 };
 
-/* nfs41 sessions slot seqid */
-struct nfs4_slot {
-	struct nfs4_slot_table	*table;
-	struct nfs4_slot	*next;
-	unsigned long		generation;
-	unsigned long		renewal_time;
-	u32			slot_nr;
-	u32		 	seq_nr;
-};
-
+struct nfs4_slot;
 struct nfs4_sequence_args {
 	struct nfs4_slot	*sa_slot;
 	u8			sa_cache_this;
-- 
cgit v1.2.3


From 8fe72bac8de784c4059b41a7dd6bb0151a3ae898 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 29 Oct 2012 19:02:20 -0400
Subject: NFSv4: Clean up handling of privileged operations

Privileged rpc calls are those that are run by the state recovery thread,
in cases where we're trying to recover the system after a server reboot
or a network partition. In those cases, we want to fence off all other
rpc calls (see nfs4_begin_drain_session()) so that they don't end up
using stateids or clientids that are in the process of being recovered.

Prior to this patch, we had to set up special callback functions in
order to declare an rpc call as being privileged.
By adding a new field to the sequence arguments, this patch simplifies
things considerably, and allows us to declare the rpc call as privileged
before it is run.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2076149db1a4..baa673edb597 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,7 +188,8 @@ struct nfs4_channel_attrs {
 struct nfs4_slot;
 struct nfs4_sequence_args {
 	struct nfs4_slot	*sa_slot;
-	u8			sa_cache_this;
+	u8			sa_cache_this : 1,
+				sa_privileged : 1;
 };
 
 struct nfs4_sequence_res {
-- 
cgit v1.2.3


From 62ae082d883d167cdaa7895cf2972d85e178228a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 29 Nov 2012 17:10:01 -0500
Subject: NFSv4: Reorder the XDR structures to put sequence at the top, not
 bottom

Pre-condition for optimising the slot allocation and reintroducing FIFO
behaviour.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 138 ++++++++++++++++++++++++------------------------
 1 file changed, 69 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index baa673edb597..a55abd499c21 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -205,8 +205,8 @@ struct nfs4_get_lease_time_args {
 };
 
 struct nfs4_get_lease_time_res {
-	struct nfs_fsinfo	       *lr_fsinfo;
 	struct nfs4_sequence_res	lr_seq_res;
+	struct nfs_fsinfo	       *lr_fsinfo;
 };
 
 #define PNFS_LAYOUT_MAXSIZE 4096
@@ -224,23 +224,23 @@ struct pnfs_layout_range {
 };
 
 struct nfs4_layoutget_args {
+	struct nfs4_sequence_args seq_args;
 	__u32 type;
 	struct pnfs_layout_range range;
 	__u64 minlength;
 	__u32 maxcount;
 	struct inode *inode;
 	struct nfs_open_context *ctx;
-	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
 	struct nfs4_layoutdriver_data layout;
 };
 
 struct nfs4_layoutget_res {
+	struct nfs4_sequence_res seq_res;
 	__u32 return_on_close;
 	struct pnfs_layout_range range;
 	__u32 type;
 	nfs4_stateid stateid;
-	struct nfs4_sequence_res seq_res;
 	struct nfs4_layoutdriver_data *layoutp;
 };
 
@@ -251,38 +251,38 @@ struct nfs4_layoutget {
 };
 
 struct nfs4_getdevicelist_args {
+	struct nfs4_sequence_args seq_args;
 	const struct nfs_fh *fh;
 	u32 layoutclass;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_getdevicelist_res {
-	struct pnfs_devicelist *devlist;
 	struct nfs4_sequence_res seq_res;
+	struct pnfs_devicelist *devlist;
 };
 
 struct nfs4_getdeviceinfo_args {
-	struct pnfs_device *pdev;
 	struct nfs4_sequence_args seq_args;
+	struct pnfs_device *pdev;
 };
 
 struct nfs4_getdeviceinfo_res {
-	struct pnfs_device *pdev;
 	struct nfs4_sequence_res seq_res;
+	struct pnfs_device *pdev;
 };
 
 struct nfs4_layoutcommit_args {
+	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
 	__u64 lastbytewritten;
 	struct inode *inode;
 	const u32 *bitmask;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_layoutcommit_res {
+	struct nfs4_sequence_res seq_res;
 	struct nfs_fattr *fattr;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res seq_res;
 	int status;
 };
 
@@ -296,11 +296,11 @@ struct nfs4_layoutcommit_data {
 };
 
 struct nfs4_layoutreturn_args {
+	struct nfs4_sequence_args seq_args;
 	struct pnfs_layout_hdr *layout;
 	struct inode *inode;
 	nfs4_stateid stateid;
 	__u32   layout_type;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_layoutreturn_res {
@@ -326,6 +326,7 @@ struct stateowner_id {
  * Arguments to the open call.
  */
 struct nfs_openargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *	fh;
 	struct nfs_seqid *	seqid;
 	int			open_flags;
@@ -346,10 +347,10 @@ struct nfs_openargs {
 	const u32 *		bitmask;
 	const u32 *		open_bitmap;
 	__u32			claim;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_openres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_fh           fh;
 	struct nfs4_change_info	cinfo;
@@ -364,7 +365,6 @@ struct nfs_openres {
 	__u32			attrset[NFS4_BITMAP_SIZE];
 	struct nfs4_string	*owner;
 	struct nfs4_string	*group_owner;
-	struct nfs4_sequence_res	seq_res;
 	__u32			access_request;
 	__u32			access_supported;
 	__u32			access_result;
@@ -388,20 +388,20 @@ struct nfs_open_confirmres {
  * Arguments to the close call.
  */
 struct nfs_closeargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *         fh;
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 	fmode_t			fmode;
 	const u32 *		bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_closeres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_fattr *	fattr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 /*
  *  * Arguments to the lock,lockt, and locku call.
@@ -413,6 +413,7 @@ struct nfs_lowner {
 };
 
 struct nfs_lock_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_seqid *	lock_seqid;
@@ -423,40 +424,39 @@ struct nfs_lock_args {
 	unsigned char		block : 1;
 	unsigned char		reclaim : 1;
 	unsigned char		new_lock_owner : 1;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lock_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	lock_seqid;
 	struct nfs_seqid *	open_seqid;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_locku_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_seqid *	seqid;
 	nfs4_stateid *		stateid;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_locku_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	seqid;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_lockt_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_lowner	lock_owner;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lockt_res {
-	struct file_lock *	denied; /* LOCK, LOCKT failed */
 	struct nfs4_sequence_res	seq_res;
+	struct file_lock *	denied; /* LOCK, LOCKT failed */
 };
 
 struct nfs_release_lockowner_args {
@@ -464,22 +464,23 @@ struct nfs_release_lockowner_args {
 };
 
 struct nfs4_delegreturnargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *fhandle;
 	const nfs4_stateid *stateid;
 	const u32 * bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_delegreturnres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr * fattr;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the read call.
  */
 struct nfs_readargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
 	struct nfs_lock_context *lock_context;
@@ -487,20 +488,20 @@ struct nfs_readargs {
 	__u32			count;
 	unsigned int		pgbase;
 	struct page **		pages;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_readres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
 	__u32			count;
 	int                     eof;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the write call.
  */
 struct nfs_writeargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
 	struct nfs_lock_context *lock_context;
@@ -510,7 +511,6 @@ struct nfs_writeargs {
 	unsigned int		pgbase;
 	struct page **		pages;
 	const u32 *		bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_write_verifier {
@@ -523,65 +523,65 @@ struct nfs_writeverf {
 };
 
 struct nfs_writeres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
 	struct nfs_writeverf *	verf;
 	__u32			count;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the commit call.
  */
 struct nfs_commitargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh		*fh;
 	__u64			offset;
 	__u32			count;
 	const u32		*bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_commitres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr	*fattr;
 	struct nfs_writeverf	*verf;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Common arguments to the unlink call
  */
 struct nfs_removeargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh	*fh;
 	struct qstr		name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_removeres {
+	struct nfs4_sequence_res 	seq_res;
 	const struct nfs_server *server;
 	struct nfs_fattr	*dir_attr;
 	struct nfs4_change_info	cinfo;
-	struct nfs4_sequence_res 	seq_res;
 };
 
 /*
  * Common arguments to the rename call
  */
 struct nfs_renameargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh		*old_dir;
 	const struct nfs_fh		*new_dir;
 	const struct qstr		*old_name;
 	const struct qstr		*new_name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_renameres {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server		*server;
 	struct nfs4_change_info		old_cinfo;
 	struct nfs_fattr		*old_fattr;
 	struct nfs4_change_info		new_cinfo;
 	struct nfs_fattr		*new_fattr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
@@ -622,20 +622,20 @@ struct nfs_createargs {
 };
 
 struct nfs_setattrargs {
+	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *                 fh;
 	nfs4_stateid                    stateid;
 	struct iattr *                  iap;
 	const struct nfs_server *	server; /* Needed for name mapping */
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs_setaclargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_setaclres {
@@ -643,27 +643,27 @@ struct nfs_setaclres {
 };
 
 struct nfs_getaclargs {
+	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 /* getxattr ACL interface flags */
 #define NFS4_ACL_TRUNC		0x0001	/* ACL was truncated */
 struct nfs_getaclres {
+	struct nfs4_sequence_res	seq_res;
 	size_t				acl_len;
 	size_t				acl_data_offset;
 	int				acl_flags;
 	struct page *			acl_scratch;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_setattrres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_linkargs {
@@ -828,21 +828,22 @@ struct nfs3_getaclres {
 typedef u64 clientid4;
 
 struct nfs4_accessargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
 	u32				access;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_accessres {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	u32				supported;
 	u32				access;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_create_arg {
+	struct nfs4_sequence_args 	seq_args;
 	u32				ftype;
 	union {
 		struct {
@@ -859,88 +860,88 @@ struct nfs4_create_arg {
 	const struct iattr *		attrs;
 	const struct nfs_fh *		dir_fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_create_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fh *			fh;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		dir_cinfo;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_fsinfo_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_fsinfo_res {
-	struct nfs_fsinfo	       *fsinfo;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_fsinfo	       *fsinfo;
 };
 
 struct nfs4_getattr_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_getattr_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_link_arg {
+	struct nfs4_sequence_args 	seq_args;
 	const struct nfs_fh *		fh;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_link_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		cinfo;
 	struct nfs_fattr *		dir_attr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 
 struct nfs4_lookup_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_lookup_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs_fh *			fh;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_lookup_root_arg {
-	const u32 *			bitmask;
 	struct nfs4_sequence_args	seq_args;
+	const u32 *			bitmask;
 };
 
 struct nfs4_pathconf_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_pathconf_res {
-	struct nfs_pathconf	       *pathconf;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_pathconf	       *pathconf;
 };
 
 struct nfs4_readdir_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	u64				cookie;
 	nfs4_verifier			verifier;
@@ -949,21 +950,20 @@ struct nfs4_readdir_arg {
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
 	int				plus;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readdir_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_verifier			verifier;
 	unsigned int			pgbase;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_readlink {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	unsigned int			pgbase;
 	unsigned int			pglen;   /* zero-copy data */
 	struct page **			pages;   /* zero-copy data */
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readlink_res {
@@ -989,28 +989,28 @@ struct nfs4_setclientid_res {
 };
 
 struct nfs4_statfs_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_statfs_res {
-	struct nfs_fsstat	       *fsstat;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_fsstat	       *fsstat;
 };
 
 struct nfs4_server_caps_arg {
-	struct nfs_fh		       *fhandle;
 	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh		       *fhandle;
 };
 
 struct nfs4_server_caps_res {
+	struct nfs4_sequence_res	seq_res;
 	u32				attr_bitmask[3];
 	u32				acl_bitmask;
 	u32				has_links;
 	u32				has_symlinks;
 	u32				fh_expire_type;
-	struct nfs4_sequence_res	seq_res;
 };
 
 #define NFS4_PATHNAME_MAXCOMPONENTS 512
@@ -1036,16 +1036,16 @@ struct nfs4_fs_locations {
 };
 
 struct nfs4_fs_locations_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *dir_fh;
 	const struct qstr *name;
 	struct page *page;
 	const u32 *bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_fs_locations_res {
-	struct nfs4_fs_locations       *fs_locations;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs4_fs_locations       *fs_locations;
 };
 
 struct nfs4_secinfo_oid {
@@ -1070,14 +1070,14 @@ struct nfs4_secinfo_flavors {
 };
 
 struct nfs4_secinfo_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh		*dir_fh;
 	const struct qstr		*name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_secinfo_res {
-	struct nfs4_secinfo_flavors	*flavors;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs4_secinfo_flavors	*flavors;
 };
 
 #endif /* CONFIG_NFS_V4 */
@@ -1157,9 +1157,9 @@ struct nfs41_create_session_res {
 };
 
 struct nfs41_reclaim_complete_args {
+	struct nfs4_sequence_args	seq_args;
 	/* In the future extend to include curr_fh for use with migration */
 	unsigned char			one_fs:1;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs41_reclaim_complete_res {
@@ -1169,28 +1169,28 @@ struct nfs41_reclaim_complete_res {
 #define SECINFO_STYLE_CURRENT_FH 0
 #define SECINFO_STYLE_PARENT 1
 struct nfs41_secinfo_no_name_args {
-	int				style;
 	struct nfs4_sequence_args	seq_args;
+	int				style;
 };
 
 struct nfs41_test_stateid_args {
-	nfs4_stateid			*stateid;
 	struct nfs4_sequence_args	seq_args;
+	nfs4_stateid			*stateid;
 };
 
 struct nfs41_test_stateid_res {
-	unsigned int			status;
 	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
 };
 
 struct nfs41_free_stateid_args {
-	nfs4_stateid			*stateid;
 	struct nfs4_sequence_args	seq_args;
+	nfs4_stateid			*stateid;
 };
 
 struct nfs41_free_stateid_res {
-	unsigned int			status;
 	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
 };
 
 #else
-- 
cgit v1.2.3


From c05eecf636101dd4347b2d8fa457626bf0088e0a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 30 Nov 2012 23:59:29 -0500
Subject: SUNRPC: Don't allow low priority tasks to pre-empt higher priority
 ones

Currently, the priority queues attempt to be 'fair' to lower priority
tasks by scheduling them after a certain number of higher priority tasks
have run. The problem is that both the transport send queue and
the NFSv4.1 session slot queue have strong ordering requirements.

This patch therefore removes the fairness code in favour of strong
ordering of task priorities.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index dc0c3cc3ada3..b64f8eb0b973 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -192,7 +192,6 @@ struct rpc_wait_queue {
 	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
-	unsigned char		count;			/* # task groups remaining serviced so far */
 	unsigned char		nr;			/* # tasks remaining for cookie */
 	unsigned short		qlen;			/* total # tasks waiting in queue */
 	struct rpc_timer	timer_list;
-- 
cgit v1.2.3


From cf66bb93e0f75e0a4ba1ec070692618fa028e994 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 3 Dec 2012 16:25:40 +0000
Subject: byteorder: allow arch to opt to use GCC intrinsics for byteswapping

Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16()
intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC,
48 for other platforms).

By using these instead of the inline assembler that most architectures
have in their __arch_swabXX() macros, we let the compiler see what's
actually happening. The resulting code should be at least as good, and
much *better* in the cases where it can be combined with a nearby load
or store, using a load-and-byteswap or store-and-byteswap instruction
(e.g. lwbrx/stwbrx on PowerPC, movbe on Atom).

When GCC is sufficiently recent *and* the architecture opts in to using
the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be
used in preference to the __arch_swabXX() macros. An architecture which
does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own
hand-crafted macros.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/compiler-gcc4.h  | 10 ++++++++++
 include/linux/compiler-intel.h |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 412bc6c2b023..dc16a858e77c 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -63,3 +63,13 @@
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
 #endif
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#if __GNUC_MINOR__ >= 4
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#endif
+#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6)
+#define __HAVE_BUILTIN_BSWAP16__
+#endif
+#endif
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index d8e636e5607d..973ce10c40b6 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -29,3 +29,10 @@
 #endif
 
 #define uninitialized_var(x) x
+
+#ifndef __HAVE_BUILTIN_BSWAP16__
+/* icc has this, but it's called _bswap16 */
+#define __HAVE_BUILTIN_BSWAP16__
+#define __builtin_bswap16 _bswap16
+#endif
+
-- 
cgit v1.2.3


From dd31866b0d55c9b70722ebad6ccd643223d9269e Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Date: Fri, 2 Nov 2012 17:06:26 +0900
Subject: f2fs: add on-disk layout

This adds a header file describing the on-disk layout of f2fs.

Signed-off-by: Changman Lee <cm224.lee@samsung.com>
Signed-off-by: Chul Lee <chur.lee@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 include/linux/f2fs_fs.h | 410 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 include/linux/f2fs_fs.h

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
new file mode 100644
index 000000000000..1429ece7caab
--- /dev/null
+++ b/include/linux/f2fs_fs.h
@@ -0,0 +1,410 @@
+/**
+ * include/linux/f2fs_fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_FS_H
+#define _LINUX_F2FS_FS_H
+
+#include <linux/pagemap.h>
+#include <linux/types.h>
+
+#define F2FS_SUPER_OFFSET		1024	/* byte-size offset */
+#define F2FS_LOG_SECTOR_SIZE		9	/* 9 bits for 512 byte */
+#define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
+#define F2FS_BLKSIZE			4096	/* support only 4KB block */
+#define F2FS_MAX_EXTENSION		64	/* # of extension entries */
+
+#define NULL_ADDR		0x0U
+#define NEW_ADDR		-1U
+
+#define F2FS_ROOT_INO(sbi)	(sbi->root_ino_num)
+#define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
+#define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
+
+/* This flag is used by node and meta inodes, and by recovery */
+#define GFP_F2FS_ZERO	(GFP_NOFS | __GFP_ZERO)
+
+/*
+ * For further optimization on multi-head logs, on-disk layout supports maximum
+ * 16 logs by default. The number, 16, is expected to cover all the cases
+ * enoughly. The implementaion currently uses no more than 6 logs.
+ * Half the logs are used for nodes, and the other half are used for data.
+ */
+#define MAX_ACTIVE_LOGS	16
+#define MAX_ACTIVE_NODE_LOGS	8
+#define MAX_ACTIVE_DATA_LOGS	8
+
+/*
+ * For superblock
+ */
+struct f2fs_super_block {
+	__le32 magic;			/* Magic Number */
+	__le16 major_ver;		/* Major Version */
+	__le16 minor_ver;		/* Minor Version */
+	__le32 log_sectorsize;		/* log2 sector size in bytes */
+	__le32 log_sectors_per_block;	/* log2 # of sectors per block */
+	__le32 log_blocksize;		/* log2 block size in bytes */
+	__le32 log_blocks_per_seg;	/* log2 # of blocks per segment */
+	__le32 segs_per_sec;		/* # of segments per section */
+	__le32 secs_per_zone;		/* # of sections per zone */
+	__le32 checksum_offset;		/* checksum offset inside super block */
+	__le64 block_count;		/* total # of user blocks */
+	__le32 section_count;		/* total # of sections */
+	__le32 segment_count;		/* total # of segments */
+	__le32 segment_count_ckpt;	/* # of segments for checkpoint */
+	__le32 segment_count_sit;	/* # of segments for SIT */
+	__le32 segment_count_nat;	/* # of segments for NAT */
+	__le32 segment_count_ssa;	/* # of segments for SSA */
+	__le32 segment_count_main;	/* # of segments for main area */
+	__le32 segment0_blkaddr;	/* start block address of segment 0 */
+	__le32 cp_blkaddr;		/* start block address of checkpoint */
+	__le32 sit_blkaddr;		/* start block address of SIT */
+	__le32 nat_blkaddr;		/* start block address of NAT */
+	__le32 ssa_blkaddr;		/* start block address of SSA */
+	__le32 main_blkaddr;		/* start block address of main area */
+	__le32 root_ino;		/* root inode number */
+	__le32 node_ino;		/* node inode number */
+	__le32 meta_ino;		/* meta inode number */
+	__u8 uuid[16];			/* 128-bit uuid for volume */
+	__le16 volume_name[512];	/* volume name */
+	__le32 extension_count;		/* # of extensions below */
+	__u8 extension_list[F2FS_MAX_EXTENSION][8];	/* extension array */
+} __packed;
+
+/*
+ * For checkpoint
+ */
+#define CP_ERROR_FLAG		0x00000008
+#define CP_COMPACT_SUM_FLAG	0x00000004
+#define CP_ORPHAN_PRESENT_FLAG	0x00000002
+#define CP_UMOUNT_FLAG		0x00000001
+
+struct f2fs_checkpoint {
+	__le64 checkpoint_ver;		/* checkpoint block version number */
+	__le64 user_block_count;	/* # of user blocks */
+	__le64 valid_block_count;	/* # of valid blocks in main area */
+	__le32 rsvd_segment_count;	/* # of reserved segments for gc */
+	__le32 overprov_segment_count;	/* # of overprovision segments */
+	__le32 free_segment_count;	/* # of free segments in main area */
+
+	/* information of current node segments */
+	__le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS];
+	__le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS];
+	/* information of current data segments */
+	__le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS];
+	__le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS];
+	__le32 ckpt_flags;		/* Flags : umount and journal_present */
+	__le32 cp_pack_total_block_count;	/* total # of one cp pack */
+	__le32 cp_pack_start_sum;	/* start block number of data summary */
+	__le32 valid_node_count;	/* Total number of valid nodes */
+	__le32 valid_inode_count;	/* Total number of valid inodes */
+	__le32 next_free_nid;		/* Next free node number */
+	__le32 sit_ver_bitmap_bytesize;	/* Default value 64 */
+	__le32 nat_ver_bitmap_bytesize; /* Default value 256 */
+	__le32 checksum_offset;		/* checksum offset inside cp block */
+	__le64 elapsed_time;		/* mounted time */
+	/* allocation type of current segment */
+	unsigned char alloc_type[MAX_ACTIVE_LOGS];
+
+	/* SIT and NAT version bitmap */
+	unsigned char sit_nat_version_bitmap[1];
+} __packed;
+
+/*
+ * For orphan inode management
+ */
+#define F2FS_ORPHANS_PER_BLOCK	1020
+
+struct f2fs_orphan_block {
+	__le32 ino[F2FS_ORPHANS_PER_BLOCK];	/* inode numbers */
+	__le32 reserved;	/* reserved */
+	__le16 blk_addr;	/* block index in current CP */
+	__le16 blk_count;	/* Number of orphan inode blocks in CP */
+	__le32 entry_count;	/* Total number of orphan nodes in current CP */
+	__le32 check_sum;	/* CRC32 for orphan inode block */
+} __packed;
+
+/*
+ * For NODE structure
+ */
+struct f2fs_extent {
+	__le32 fofs;		/* start file offset of the extent */
+	__le32 blk_addr;	/* start block address of the extent */
+	__le32 len;		/* lengh of the extent */
+} __packed;
+
+#define F2FS_MAX_NAME_LEN	256
+#define ADDRS_PER_INODE         923	/* Address Pointers in an Inode */
+#define ADDRS_PER_BLOCK         1018	/* Address Pointers in a Direct Block */
+#define NIDS_PER_BLOCK          1018	/* Node IDs in an Indirect Block */
+
+struct f2fs_inode {
+	__le16 i_mode;			/* file mode */
+	__u8 i_advise;			/* file hints */
+	__u8 i_reserved;		/* reserved */
+	__le32 i_uid;			/* user ID */
+	__le32 i_gid;			/* group ID */
+	__le32 i_links;			/* links count */
+	__le64 i_size;			/* file size in bytes */
+	__le64 i_blocks;		/* file size in blocks */
+	__le64 i_atime;			/* access time */
+	__le64 i_ctime;			/* change time */
+	__le64 i_mtime;			/* modification time */
+	__le32 i_atime_nsec;		/* access time in nano scale */
+	__le32 i_ctime_nsec;		/* change time in nano scale */
+	__le32 i_mtime_nsec;		/* modification time in nano scale */
+	__le32 i_generation;		/* file version (for NFS) */
+	__le32 i_current_depth;		/* only for directory depth */
+	__le32 i_xattr_nid;		/* nid to save xattr */
+	__le32 i_flags;			/* file attributes */
+	__le32 i_pino;			/* parent inode number */
+	__le32 i_namelen;		/* file name length */
+	__u8 i_name[F2FS_MAX_NAME_LEN];	/* file name for SPOR */
+
+	struct f2fs_extent i_ext;	/* caching a largest extent */
+
+	__le32 i_addr[ADDRS_PER_INODE];	/* Pointers to data blocks */
+
+	__le32 i_nid[5];		/* direct(2), indirect(2),
+						double_indirect(1) node id */
+} __packed;
+
+struct direct_node {
+	__le32 addr[ADDRS_PER_BLOCK];	/* array of data block address */
+} __packed;
+
+struct indirect_node {
+	__le32 nid[NIDS_PER_BLOCK];	/* array of data block address */
+} __packed;
+
+enum {
+	COLD_BIT_SHIFT = 0,
+	FSYNC_BIT_SHIFT,
+	DENT_BIT_SHIFT,
+	OFFSET_BIT_SHIFT
+};
+
+struct node_footer {
+	__le32 nid;		/* node id */
+	__le32 ino;		/* inode nunmber */
+	__le32 flag;		/* include cold/fsync/dentry marks and offset */
+	__le64 cp_ver;		/* checkpoint version */
+	__le32 next_blkaddr;	/* next node page block address */
+} __packed;
+
+struct f2fs_node {
+	/* can be one of three types: inode, direct, and indirect types */
+	union {
+		struct f2fs_inode i;
+		struct direct_node dn;
+		struct indirect_node in;
+	};
+	struct node_footer footer;
+} __packed;
+
+/*
+ * For NAT entries
+ */
+#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
+
+struct f2fs_nat_entry {
+	__u8 version;		/* latest version of cached nat entry */
+	__le32 ino;		/* inode number */
+	__le32 block_addr;	/* block address */
+} __packed;
+
+struct f2fs_nat_block {
+	struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For SIT entries
+ *
+ * Each segment is 2MB in size by default so that a bitmap for validity of
+ * there-in blocks should occupy 64 bytes, 512 bits.
+ * Not allow to change this.
+ */
+#define SIT_VBLOCK_MAP_SIZE 64
+#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
+
+/*
+ * Note that f2fs_sit_entry->vblocks has the following bit-field information.
+ * [15:10] : allocation type such as CURSEG_XXXX_TYPE
+ * [9:0] : valid block count
+ */
+#define SIT_VBLOCKS_SHIFT	10
+#define SIT_VBLOCKS_MASK	((1 << SIT_VBLOCKS_SHIFT) - 1)
+#define GET_SIT_VBLOCKS(raw_sit)				\
+	(le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK)
+#define GET_SIT_TYPE(raw_sit)					\
+	((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK)	\
+	 >> SIT_VBLOCKS_SHIFT)
+
+struct f2fs_sit_entry {
+	__le16 vblocks;				/* reference above */
+	__u8 valid_map[SIT_VBLOCK_MAP_SIZE];	/* bitmap for valid blocks */
+	__le64 mtime;				/* segment age for cleaning */
+} __packed;
+
+struct f2fs_sit_block {
+	struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For segment summary
+ *
+ * One summary block contains exactly 512 summary entries, which represents
+ * exactly 2MB segment by default. Not allow to change the basic units.
+ *
+ * NOTE: For initializing fields, you must use set_summary
+ *
+ * - If data page, nid represents dnode's nid
+ * - If node page, nid represents the node page's nid.
+ *
+ * The ofs_in_node is used by only data page. It represents offset
+ * from node's page's beginning to get a data block address.
+ * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
+ */
+#define ENTRIES_IN_SUM		512
+#define	SUMMARY_SIZE		(sizeof(struct f2fs_summary))
+#define	SUM_FOOTER_SIZE		(sizeof(struct summary_footer))
+#define SUM_ENTRY_SIZE		(SUMMARY_SIZE * ENTRIES_IN_SUM)
+
+/* a summary entry for a 4KB-sized block in a segment */
+struct f2fs_summary {
+	__le32 nid;		/* parent node id */
+	union {
+		__u8 reserved[3];
+		struct {
+			__u8 version;		/* node version number */
+			__le16 ofs_in_node;	/* block index in parent node */
+		} __packed;
+	};
+} __packed;
+
+/* summary block type, node or data, is stored to the summary_footer */
+#define SUM_TYPE_NODE		(1)
+#define SUM_TYPE_DATA		(0)
+
+struct summary_footer {
+	unsigned char entry_type;	/* SUM_TYPE_XXX */
+	__u32 check_sum;		/* summary checksum */
+} __packed;
+
+#define SUM_JOURNAL_SIZE	(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\
+				SUM_ENTRY_SIZE)
+#define NAT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
+				sizeof(struct nat_journal_entry))
+#define NAT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
+				sizeof(struct nat_journal_entry))
+#define SIT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
+				sizeof(struct sit_journal_entry))
+#define SIT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
+				sizeof(struct sit_journal_entry))
+/*
+ * frequently updated NAT/SIT entries can be stored in the spare area in
+ * summary blocks
+ */
+enum {
+	NAT_JOURNAL = 0,
+	SIT_JOURNAL
+};
+
+struct nat_journal_entry {
+	__le32 nid;
+	struct f2fs_nat_entry ne;
+} __packed;
+
+struct nat_journal {
+	struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
+	__u8 reserved[NAT_JOURNAL_RESERVED];
+} __packed;
+
+struct sit_journal_entry {
+	__le32 segno;
+	struct f2fs_sit_entry se;
+} __packed;
+
+struct sit_journal {
+	struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
+	__u8 reserved[SIT_JOURNAL_RESERVED];
+} __packed;
+
+/* 4KB-sized summary block structure */
+struct f2fs_summary_block {
+	struct f2fs_summary entries[ENTRIES_IN_SUM];
+	union {
+		__le16 n_nats;
+		__le16 n_sits;
+	};
+	/* spare area is used by NAT or SIT journals */
+	union {
+		struct nat_journal nat_j;
+		struct sit_journal sit_j;
+	};
+	struct summary_footer footer;
+} __packed;
+
+/*
+ * For directory operations
+ */
+#define F2FS_DOT_HASH		0
+#define F2FS_DDOT_HASH		F2FS_DOT_HASH
+#define F2FS_MAX_HASH		(~((0x3ULL) << 62))
+#define F2FS_HASH_COL_BIT	((0x1ULL) << 63)
+
+typedef __le32	f2fs_hash_t;
+
+/* One directory entry slot covers 8bytes-long file name */
+#define F2FS_NAME_LEN		8
+
+/* the number of dentry in a block */
+#define NR_DENTRY_IN_BLOCK	214
+
+/* MAX level for dir lookup */
+#define MAX_DIR_HASH_DEPTH	63
+
+#define SIZE_OF_DIR_ENTRY	11	/* by byte */
+#define SIZE_OF_DENTRY_BITMAP	((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
+					BITS_PER_BYTE)
+#define SIZE_OF_RESERVED	(PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
+				F2FS_NAME_LEN) * \
+				NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
+
+/* One directory entry slot representing F2FS_NAME_LEN-sized file name */
+struct f2fs_dir_entry {
+	__le32 hash_code;	/* hash code of file name */
+	__le32 ino;		/* inode number */
+	__le16 name_len;	/* lengh of file name */
+	__u8 file_type;		/* file type */
+} __packed;
+
+/* 4KB-sized directory entry block */
+struct f2fs_dentry_block {
+	/* validity bitmap for directory entries in each block */
+	__u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP];
+	__u8 reserved[SIZE_OF_RESERVED];
+	struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK];
+	__u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN];
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+	F2FS_FT_UNKNOWN,
+	F2FS_FT_REG_FILE,
+	F2FS_FT_DIR,
+	F2FS_FT_CHRDEV,
+	F2FS_FT_BLKDEV,
+	F2FS_FT_FIFO,
+	F2FS_FT_SOCK,
+	F2FS_FT_SYMLINK,
+	F2FS_FT_MAX
+};
+
+#endif  /* _LINUX_F2FS_FS_H */
-- 
cgit v1.2.3


From 25ca923b2a766b9c93b63777ead351137533a623 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Date: Wed, 28 Nov 2012 16:12:41 +0900
Subject: f2fs: fix endian conversion bugs reported by sparse

This patch should resolve the bugs reported by the sparse tool.
Initial reports were written by "kbuild test robot" managed by fengguang.wu.

In my local machines, I've tested also by running:
> make C=2 CF="-D__CHECK_ENDIAN__"

Accordingly, I've found lots of warnings and bugs related to the endian
conversion. And I've fixed all at this moment.

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 include/linux/f2fs_fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1429ece7caab..c2fbbc35c1e6 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -272,8 +272,8 @@ struct f2fs_sit_block {
  * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
  */
 #define ENTRIES_IN_SUM		512
-#define	SUMMARY_SIZE		(sizeof(struct f2fs_summary))
-#define	SUM_FOOTER_SIZE		(sizeof(struct summary_footer))
+#define	SUMMARY_SIZE		(7)	/* sizeof(struct summary) */
+#define	SUM_FOOTER_SIZE		(5)	/* sizeof(struct summary_footer) */
 #define SUM_ENTRY_SIZE		(SUMMARY_SIZE * ENTRIES_IN_SUM)
 
 /* a summary entry for a 4KB-sized block in a segment */
@@ -297,7 +297,7 @@ struct summary_footer {
 	__u32 check_sum;		/* summary checksum */
 } __packed;
 
-#define SUM_JOURNAL_SIZE	(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\
+#define SUM_JOURNAL_SIZE	(F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
 				SUM_ENTRY_SIZE)
 #define NAT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
 				sizeof(struct nat_journal_entry))
-- 
cgit v1.2.3


From 457d08ee4fd91c8df17917ff2d32565e6adacbfc Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Sat, 8 Dec 2012 14:54:50 +0900
Subject: f2fs: introduce accessor to retrieve number of dentry slots

Simplify code by providing the accessor macro to retrieve the
number of dentry slots for a given filename length.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Amit Sahrawat <a.sahrawat@samsung.com>
---
 include/linux/f2fs_fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index c2fbbc35c1e6..f9a12f6243a5 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -363,6 +363,9 @@ typedef __le32	f2fs_hash_t;
 
 /* One directory entry slot covers 8bytes-long file name */
 #define F2FS_NAME_LEN		8
+#define F2FS_NAME_LEN_BITS	3
+
+#define GET_DENTRY_SLOTS(x)	((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS)
 
 /* the number of dentry in a block */
 #define NR_DENTRY_IN_BLOCK	214
-- 
cgit v1.2.3


From 3c58346525d82625e68e24f071804c2dc057b6f4 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Wed, 28 Nov 2012 16:23:01 +0000
Subject: slab: Simplify bootstrap

The nodelists field in kmem_cache is pointing to the first unused
object in the array field when bootstrap is complete.

A problem with the current approach is that the statically sized
kmem_cache structure use on boot can only contain NR_CPUS entries.
If the number of nodes plus the number of cpus is greater then we
would overwrite memory following the kmem_cache_boot definition.

Increase the size of the array field to ensure that also the node
pointers fit into the array field.

Once we do that we no longer need the kmem_cache_nodelists
array and we can then also use that structure elsewhere.

Acked-by: Glauber Costa <glommer@parallels.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab_def.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index cc290f0bdb34..45c0356fdc8c 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -89,9 +89,13 @@ struct kmem_cache {
 	 * (see kmem_cache_init())
 	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
 	 * is statically defined, so we reserve the max number of cpus.
+	 *
+	 * We also need to guarantee that the list is able to accomodate a
+	 * pointer for each node since "nodelists" uses the remainder of
+	 * available pointers.
 	 */
 	struct kmem_list3 **nodelists;
-	struct array_cache *array[NR_CPUS];
+	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
 	/*
 	 * Do not add fields after array[]
 	 */
-- 
cgit v1.2.3


From d8153d4d8b7b6141770e1416c4a338161205ed1b Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:45 +0200
Subject: inotify, fanotify: replace fsnotify_put_group() with
 fsnotify_destroy_group()

Currently in fsnotify_put_group() the ref count of a group is decremented and if
it becomes 0 fsnotify_destroy_group() is called. Since a groups ref count is only
at group creation set to 1 and never increased after that a call to fsnotify_put_group()
always results in a call to fsnotify_destroy_group().
With this patch fsnotify_destroy_group() is called directly.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 63d966d5c2ea..d2ad345bdeec 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -364,7 +364,8 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode
 extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
-
+/* destroy group */
+extern void fsnotify_destroy_group(struct fsnotify_group *group);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
-- 
cgit v1.2.3


From 986129520479d689962a42c31acdeaf854ac91f5 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:46 +0200
Subject: fsnotify: introduce fsnotify_get_group()

Introduce fsnotify_get_group() which increments the reference counter of a group.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d2ad345bdeec..e76cef75295d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -360,8 +360,10 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
 
-/* get a reference to an existing or create a new group */
+/* create a new group */
 extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
+/* get reference to a group */
+extern void fsnotify_get_group(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
 /* destroy group */
-- 
cgit v1.2.3


From 986ab09807ca9454c3f54aae4db7e1bb00daeed3 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:50 +0200
Subject: fsnotify: use a mutex instead of a spinlock to protect a groups mark
 list

Replaces the groups mark_lock spinlock with a mutex. Using a mutex instead
of a spinlock results in more flexibility (i.e it allows to sleep while the
lock is held).

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e76cef75295d..c5848346840d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -141,7 +141,7 @@ struct fsnotify_group {
 	unsigned int priority;
 
 	/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
-	spinlock_t mark_lock;		/* protect marks_list */
+	struct mutex mark_mutex;	/* protect marks_list */
 	atomic_t num_marks;		/* 1 for each mark and 1 for not being
 					 * past the point of no return when freeing
 					 * a group */
-- 
cgit v1.2.3


From e2a29943e9a2ee2aa737a77f550f46ba72269db4 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:51 +0200
Subject: fsnotify: pass group to fsnotify_destroy_mark()

In fsnotify_destroy_mark() dont get the group from the passed mark anymore,
but pass the group itself as an additional parameter to the function.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index c5848346840d..140b4b8a100b 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -408,8 +408,9 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask
 /* attach the mark to both the group and the inode */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 			     struct inode *inode, struct vfsmount *mnt, int allow_dups);
-/* given a mark, flag it to be freed when all references are dropped */
-extern void fsnotify_destroy_mark(struct fsnotify_mark *mark);
+/* given a group and a mark, flag mark to be freed when all references are dropped */
+extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+				  struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the inode marks */
-- 
cgit v1.2.3


From d5a335b845792d2a69ed1e244c0b233117b7db3c Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:52 +0200
Subject: fsnotify: introduce locked versions of fsnotify_add_mark() and
 fsnotify_remove_mark()

This patch introduces fsnotify_add_mark_locked() and fsnotify_remove_mark_locked()
which are essentially the same as fsnotify_add_mark() and fsnotify_remove_mark() but
assume that the caller has already taken the groups mark mutex.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 140b4b8a100b..26c06afa264e 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -408,9 +408,13 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask
 /* attach the mark to both the group and the inode */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 			     struct inode *inode, struct vfsmount *mnt, int allow_dups);
+extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group,
+				    struct inode *inode, struct vfsmount *mnt, int allow_dups);
 /* given a group and a mark, flag mark to be freed when all references are dropped */
 extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 				  struct fsnotify_group *group);
+extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
+					 struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the inode marks */
-- 
cgit v1.2.3


From 64c20d2a20fce295c260ea6cb3b468edfa2fb07b Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:53 +0200
Subject: fsnotify: dont put marks on temporary list when clearing marks by
 group

In clear_marks_by_group_flags() the mark list of a group is iterated and the
marks are put on a temporary list.
Since we introduced fsnotify_destroy_mark_locked() we dont need the temp list
any more and are able to remove the marks while the mark list is iterated and
the mark list mutex is held.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 26c06afa264e..5a8899350456 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -287,7 +287,6 @@ struct fsnotify_mark {
 		struct fsnotify_inode_mark i;
 		struct fsnotify_vfsmount_mark m;
 	};
-	struct list_head free_g_list;	/* tmp list used when freeing this mark */
 	__u32 ignored_mask;		/* events types to ignore */
 #define FSNOTIFY_MARK_FLAG_INODE		0x01
 #define FSNOTIFY_MARK_FLAG_VFSMOUNT		0x02
-- 
cgit v1.2.3


From 6960b0d909cde5bdff49e4e5c1250edd10be7ebd Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Fri, 12 Aug 2011 01:13:31 +0200
Subject: fsnotify: change locking order

On Mon, Aug 01, 2011 at 04:38:22PM -0400, Eric Paris wrote:
>
> I finally built and tested a v3.0 kernel with these patches (I know I'm
> SOOOOOO far behind).  Not what I hoped for:
>
> > [  150.937798] VFS: Busy inodes after unmount of tmpfs. Self-destruct in 5 seconds.  Have a nice day...
> > [  150.945290] BUG: unable to handle kernel NULL pointer dereference at 0000000000000070
> > [  150.946012] IP: [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012] PGD 2bf9e067 PUD 2bf9f067 PMD 0
> > [  150.946012] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
> > [  150.946012] CPU 0
> > [  150.946012] Modules linked in: nfs lockd fscache auth_rpcgss nfs_acl sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ext4 jbd2 crc16 joydev ata_piix i2c_piix4 pcspkr uinput ipv6 autofs4 usbhid [last unloaded: scsi_wait_scan]
> > [  150.946012]
> > [  150.946012] Pid: 2764, comm: syscall_thrash Not tainted 3.0.0+ #1 Red Hat KVM
> > [  150.946012] RIP: 0010:[<ffffffff810ffd58>]  [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012] RSP: 0018:ffff88002c2e5df8  EFLAGS: 00010282
> > [  150.946012] RAX: 000000004e370d9f RBX: 0000000000000000 RCX: ffff88003a029438
> > [  150.946012] RDX: 0000000033630a5f RSI: 0000000000000000 RDI: ffff88003491c240
> > [  150.946012] RBP: ffff88002c2e5e08 R08: 0000000000000000 R09: 0000000000000000
> > [  150.946012] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a029428
> > [  150.946012] R13: ffff88003a029428 R14: ffff88003a029428 R15: ffff88003499a610
> > [  150.946012] FS:  00007f5a05420700(0000) GS:ffff88003f600000(0000) knlGS:0000000000000000
> > [  150.946012] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> > [  150.946012] CR2: 0000000000000070 CR3: 000000002a662000 CR4: 00000000000006f0
> > [  150.946012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [  150.946012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> > [  150.946012] Process syscall_thrash (pid: 2764, threadinfo ffff88002c2e4000, task ffff88002bfbc760)
> > [  150.946012] Stack:
> > [  150.946012]  ffff88003a029438 ffff88003a029428 ffff88002c2e5e38 ffffffff81102f76
> > [  150.946012]  ffff88003a029438 ffff88003a029598 ffffffff8160f9c0 ffff88002c221250
> > [  150.946012]  ffff88002c2e5e68 ffffffff8115e9be ffff88002c2e5e68 ffff88003a029438
> > [  150.946012] Call Trace:
> > [  150.946012]  [<ffffffff81102f76>] shmem_evict_inode+0x76/0x130
> > [  150.946012]  [<ffffffff8115e9be>] evict+0x7e/0x170
> > [  150.946012]  [<ffffffff8115ee40>] iput_final+0xd0/0x190
> > [  150.946012]  [<ffffffff8115ef33>] iput+0x33/0x40
> > [  150.946012]  [<ffffffff81180205>] fsnotify_destroy_mark_locked+0x145/0x160
> > [  150.946012]  [<ffffffff81180316>] fsnotify_destroy_mark+0x36/0x50
> > [  150.946012]  [<ffffffff81181937>] sys_inotify_rm_watch+0x77/0xd0
> > [  150.946012]  [<ffffffff815aca52>] system_call_fastpath+0x16/0x1b
> > [  150.946012] Code: 67 4a 00 b8 e4 ff ff ff eb aa 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 48 8b 9f 40 05 00 00
> > [  150.946012]  83 7b 70 00 74 1c 4c 8d a3 80 00 00 00 4c 89 e7 e8 d2 5d 4a
> > [  150.946012] RIP  [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012]  RSP <ffff88002c2e5df8>
> > [  150.946012] CR2: 0000000000000070
>
> Looks at aweful lot like the problem from:
> http://www.spinics.net/lists/linux-fsdevel/msg46101.html
>

I tried to reproduce this bug with your test program, but without success.
However, if I understand correctly, this occurs since we dont hold any locks when
we call iput() in mark_destroy(), right?
With the patches you tested, iput() is also not called within any lock, since the
groups mark_mutex is released temporarily before iput() is called.  This is, since
the original codes behaviour is similar.
However since we now have a mutex as the biggest lock, we can do what you
suggested (http://www.spinics.net/lists/linux-fsdevel/msg46107.html) and
call iput() with the mutex held to avoid the race.
The patch below implements this. It uses nested locking to avoid deadlock in case
we do the final iput() on an inode which still holds marks and thus would take
the mutex again when calling fsnotify_inode_delete() in destroy_inode().

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 5a8899350456..1af2f6a722c0 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -88,9 +88,10 @@ struct fsnotify_event_private_data;
  *		if the group is interested in this event.
  * handle_event - main call for a group to handle an fs event
  * free_group_priv - called when a group refcnt hits 0 to clean up the private union
- * freeing-mark - this means that a mark has been flagged to die when everything
- *		finishes using it.  The function is supplied with what must be a
- *		valid group and inode to use to clean up.
+ * freeing_mark - called when a mark is being destroyed for some reason.  The group
+ * 		MUST be holding a reference on each mark and that reference must be
+ * 		dropped in this function.  inotify uses this function to send
+ * 		userspace messages that marks have been removed.
  */
 struct fsnotify_ops {
 	bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
-- 
cgit v1.2.3


From 0a6b6bd5919a65030b557ec8fe81f6fb3e93744a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 14 Oct 2011 17:43:39 -0400
Subject: fsnotify: make fasync generic for both inotify and fanotify

inotify is supposed to support async signal notification when information
is available on the inotify fd.  This patch moves that support to generic
fsnotify functions so it can be used by all notification mechanisms.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 1af2f6a722c0..d5b0910d4961 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -148,6 +148,8 @@ struct fsnotify_group {
 					 * a group */
 	struct list_head marks_list;	/* all inode marks for this group */
 
+	struct fasync_struct    *fsn_fa;    /* async notification */
+
 	/* groups can define private fields here or use the void *private */
 	union {
 		void *private;
@@ -156,7 +158,6 @@ struct fsnotify_group {
 			spinlock_t	idr_lock;
 			struct idr      idr;
 			u32             last_wd;
-			struct fasync_struct    *fa;    /* async notification */
 			struct user_struct      *user;
 		} inotify_data;
 #endif
@@ -368,6 +369,8 @@ extern void fsnotify_get_group(struct fsnotify_group *group);
 extern void fsnotify_put_group(struct fsnotify_group *group);
 /* destroy group */
 extern void fsnotify_destroy_group(struct fsnotify_group *group);
+/* fasync handler function */
+extern int fsnotify_fasync(int fd, struct file *file, int on);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
-- 
cgit v1.2.3


From 7056741fd9fc14a65608549a4657cf5178f05f63 Mon Sep 17 00:00:00 2001
From: Jim Kukunas <james.t.kukunas@linux.intel.com>
Date: Thu, 8 Nov 2012 13:47:44 -0800
Subject: lib/raid6: Add AVX2 optimized recovery functions

Optimize RAID6 recovery functions to take advantage of
the 256-bit YMM integer instructions introduced in AVX2.

The patch was tested and benchmarked before submission.
However hardware is not yet released so benchmark numbers
cannot be reported.

Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/pq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 640c69ceec96..3156347452b9 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -109,6 +109,7 @@ struct raid6_recov_calls {
 
 extern const struct raid6_recov_calls raid6_recov_intx1;
 extern const struct raid6_recov_calls raid6_recov_ssse3;
+extern const struct raid6_recov_calls raid6_recov_avx2;
 
 /* Algorithm list */
 extern const struct raid6_calls * const raid6_algos[];
-- 
cgit v1.2.3


From 2c935842bdb46f5f557426feb4d2bdfdad1aa5f9 Mon Sep 17 00:00:00 2001
From: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Date: Fri, 30 Nov 2012 13:10:39 -0800
Subject: lib/raid6: Add AVX2 optimized gen_syndrome functions

Add AVX2 optimized gen_syndrom functions, which is simply based on
sse2.c written by hpa.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/pq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 3156347452b9..8dfaa2ce2e95 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -98,6 +98,9 @@ extern const struct raid6_calls raid6_altivec1;
 extern const struct raid6_calls raid6_altivec2;
 extern const struct raid6_calls raid6_altivec4;
 extern const struct raid6_calls raid6_altivec8;
+extern const struct raid6_calls raid6_avx2x1;
+extern const struct raid6_calls raid6_avx2x2;
+extern const struct raid6_calls raid6_avx2x4;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
-- 
cgit v1.2.3


From 83aff95eb9d60aff5497e9f44a2ae906b86d8e88 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Wed, 28 Nov 2012 12:28:24 -0800
Subject: libceph: remove 'osdtimeout' option

This would reset a connection with any OSD that had an outstanding
request that was taking more than N seconds.  The idea was that if the
OSD was buggy, the client could compensate by resending the request.

In reality, this only served to hide server bugs, and we haven't
actually seen such a bug in quite a while.  Moreover, the userspace
client code never did this.

More importantly, often the request is taking a long time because the
OSD is trying to recover, or overloaded, and killing the connection
and retrying would only make the situation worse by giving the OSD
more work to do.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 include/linux/ceph/libceph.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 42624789b06f..317aff8feb0a 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -43,7 +43,6 @@ struct ceph_options {
 	struct ceph_entity_addr my_addr;
 	int mount_timeout;
 	int osd_idle_ttl;
-	int osd_timeout;
 	int osd_keepalive_timeout;
 
 	/*
@@ -63,7 +62,6 @@ struct ceph_options {
  * defaults
  */
 #define CEPH_MOUNT_TIMEOUT_DEFAULT  60
-#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
 #define CEPH_OSD_KEEPALIVE_DEFAULT  5
 #define CEPH_OSD_IDLE_TTL_DEFAULT    60
 
-- 
cgit v1.2.3


From d2cc4dde9206aa2c7fb237aa689d3277cc070547 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 29 Nov 2012 08:37:03 -0600
Subject: bdi_register: add __printf verification, fix arg mismatch

__printf is useful to verify format and arguments.

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 include/linux/backing-dev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2a9a9abc9126..12731a19ef06 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -114,6 +114,7 @@ struct backing_dev_info {
 int bdi_init(struct backing_dev_info *bdi);
 void bdi_destroy(struct backing_dev_info *bdi);
 
+__printf(3, 4)
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
-- 
cgit v1.2.3


From 34e1169d996ab148490c01b65b4ee371cf8ffba2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 16 Oct 2012 07:31:07 +1030
Subject: module: add syscall to load module from fd

As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.

Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.

If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on.  In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.

This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
---
 include/linux/syscalls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 727f0cd73921..32bc035bcd68 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
+asmlinkage long sys_finit_module(int fd, const char __user *uargs);
 #endif
-- 
cgit v1.2.3


From 2f3238aebedb243804f58d62d57244edec4149b2 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 22 Oct 2012 18:09:41 +1030
Subject: module: add flags arg to sys_finit_module()

Thanks to Michael Kerrisk for keeping us honest.  These flags are actually
useful for eliminating the only case where kmod has to mangle a module's
internals: for overriding module versioning.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Acked-by: Kees Cook <keescook@chromium.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 32bc035bcd68..8cf7b508cb50 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -868,5 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
-asmlinkage long sys_finit_module(int fd, const char __user *uargs);
+asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
 #endif
-- 
cgit v1.2.3


From 2e72d51b4ac32989496870cd8171b3682fea1839 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 16 Oct 2012 07:32:07 +1030
Subject: security: introduce kernel_module_from_file hook

Now that kernel module origins can be reasoned about, provide a hook to
the LSMs to make policy decisions about the module file. This will let
Chrome OS enforce that loadable kernel modules can only come from its
read-only hash-verified root filesystem. Other LSMs can, for example,
read extended attributes for signatures, etc.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Acked-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/security.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 05e88bdcf7d9..0f6afc657f77 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -694,6 +694,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
+ * @kernel_module_from_file:
+ *	Load a kernel module from userspace.
+ *	@file contains the file structure pointing to the file containing
+ *	the kernel module to load. If the module is being loaded from a blob,
+ *	this argument will be NULL.
+ *	Return 0 if permission is granted.
  * @task_fix_setuid:
  *	Update the module's state after setting one or more of the user
  *	identity attributes of the current process.  The @flags parameter
@@ -1508,6 +1514,7 @@ struct security_operations {
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
+	int (*kernel_module_from_file)(struct file *file);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
 	int (*task_setpgid) (struct task_struct *p, pid_t pgid);
@@ -1765,6 +1772,7 @@ void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
+int security_kernel_module_from_file(struct file *file);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
@@ -2278,6 +2286,11 @@ static inline int security_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
+static inline int security_kernel_module_from_file(struct file *file)
+{
+	return 0;
+}
+
 static inline int security_task_fix_setuid(struct cred *new,
 					   const struct cred *old,
 					   int flags)
-- 
cgit v1.2.3


From fdf90729e57812cb12d7938e2dee7c71e875fb08 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 16 Oct 2012 12:40:08 +1030
Subject: ima: support new kernel module syscall

With the addition of the new kernel module syscall, which defines two
arguments - a file descriptor to the kernel module and a pointer to a NULL
terminated string of module arguments - it is now possible to measure and
appraise kernel modules like any other file on the file system.

This patch adds support to measure and appraise kernel modules in an
extensible and consistent manner.

To support filesystems without extended attribute support, additional
patches could pass the signature as the first parameter.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/ima.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 2c7223d7e73b..86c361e947b9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -18,6 +18,7 @@ extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
+extern int ima_module_check(struct file *file);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -40,6 +41,11 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
+static inline int ima_module_check(struct file *file)
+{
+	return 0;
+}
+
 #endif /* CONFIG_IMA_H */
 
 #ifdef CONFIG_IMA_APPRAISE
-- 
cgit v1.2.3


From 6f33d58794ef4cef4b2c706029810f9688bd3026 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 22 Nov 2012 12:30:25 +1030
Subject: __UNIQUE_ID()

Jan Beulich points out __COUNTER__ (gcc 4.3 and above), so let's use
that to create unique ids.  This is better than __LINE__ which we use
today, so provide a wrapper.

Stanislaw Gruszka <sgruszka@redhat.com> reported that some module parameters
start with a digit, so we need to prepend when we for the unique id.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
 include/linux/compiler-gcc4.h | 2 ++
 include/linux/compiler.h      | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 412bc6c2b023..56c802cba7f6 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -31,6 +31,8 @@
 
 #define __linktime_error(message) __attribute__((__error__(message)))
 
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
 #if __GNUC_MINOR__ >= 5
 /*
  * Mark a position in code as unreachable.  This can be used to
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f430e4162f41..5f45335e1ac7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -42,6 +42,10 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __rcu
 #endif
 
+/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+
 #ifdef __KERNEL__
 
 #ifdef __GNUC__
@@ -164,6 +168,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
     (typeof(ptr)) (__ptr + (off)); })
 #endif
 
+/* Not-quite-unique ID. */
+#ifndef __UNIQUE_ID
+# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From 34182eea36fc1d70d748b0947c873314980ba806 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 22 Nov 2012 12:30:25 +1030
Subject: moduleparam: use __UNIQUE_ID()

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index d6a58065c09c..137b4198fc03 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -16,17 +16,15 @@
 /* Chosen so that structs with an unsigned long line up. */
 #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
 
-#define ___module_cat(a,b) __mod_ ## a ## b
-#define __module_cat(a,b) ___module_cat(a,b)
 #ifdef MODULE
 #define __MODULE_INFO(tag, name, info)					  \
-static const char __module_cat(name,__LINE__)[]				  \
+static const char __UNIQUE_ID(name)[]					  \
   __used __attribute__((section(".modinfo"), unused, aligned(1)))	  \
   = __stringify(tag) "=" info
 #else  /* !MODULE */
 /* This struct is here for syntactic coherency, it is not used */
 #define __MODULE_INFO(tag, name, info)					  \
-  struct __module_cat(name,__LINE__) {}
+  struct __UNIQUE_ID(name) {}
 #endif
 #define __MODULE_PARM_TYPE(name, _type)					  \
   __MODULE_INFO(parmtype, name##type, #name ":" _type)
-- 
cgit v1.2.3


From facc0a6bd494ce21e31b34fc355ecf702518272b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Dec 2012 11:55:28 +1030
Subject: ASN.1: Define indefinite length marker constant

Define a constant to hold the marker value seen in an indefinite-length
element.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/asn1.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/asn1.h b/include/linux/asn1.h
index 5c3f4e4b9a23..eed6982860ba 100644
--- a/include/linux/asn1.h
+++ b/include/linux/asn1.h
@@ -64,4 +64,6 @@ enum asn1_tag {
 	ASN1_LONG_TAG	= 31	/* Long form tag */
 };
 
+#define ASN1_INDEFINITE_LENGTH 0x80
+
 #endif /* _LINUX_ASN1_H */
-- 
cgit v1.2.3


From 63b68901dfd590cc13d4fe5c08dec2ca75b3c4aa Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Fri, 14 Dec 2012 09:09:11 -0800
Subject: mfd: omap-usb-host: get rid of cpu_is_omap..() macros

Instead of using cpu_is_omap..() macros in the device driver we
rely on information provided in the platform data.

The only information we need is whether the USB Host module has
a single ULPI bypass control bit for all ports or individual bypass
control bits for each port. OMAP3 REV2.1 and earlier have the former.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
[tony@atomide.com: updated to remove plat/cpu.h]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/platform_data/usb-omap.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h
index 8570bcfe6311..ef65b67c56c3 100644
--- a/include/linux/platform_data/usb-omap.h
+++ b/include/linux/platform_data/usb-omap.h
@@ -59,6 +59,9 @@ struct usbhs_omap_platform_data {
 
 	struct ehci_hcd_omap_platform_data	*ehci_data;
 	struct ohci_hcd_omap_platform_data	*ohci_data;
+
+	/* OMAP3 <= ES2.1 have a single ulpi bypass control bit */
+	unsigned				single_ulpi_bypass:1;
 };
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3


From d9ba573718666df2e7e30d671f81bba39d07f91c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 14 Dec 2012 09:09:11 -0800
Subject: ARM: OMAP: Move plat/omap-serial.h to
 include/linux/platform_data/serial-omap.h

We need to move this file to allow ARM multiplatform configurations
to build for omap2+. This can now be done as this file now only
contains platform_data.

cc: Russell King <linux@arm.linux.org.uk>
cc: Alan Cox <alan@linux.intel.com>
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Govindraj.R <govindraj.raja@ti.com>
cc: Kevin Hilman <khilman@ti.com>
cc: linux-serial@vger.kernel.org
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/platform_data/serial-omap.h | 51 +++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 include/linux/platform_data/serial-omap.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/serial-omap.h b/include/linux/platform_data/serial-omap.h
new file mode 100644
index 000000000000..ff9b0aab5281
--- /dev/null
+++ b/include/linux/platform_data/serial-omap.h
@@ -0,0 +1,51 @@
+/*
+ * Driver for OMAP-UART controller.
+ * Based on drivers/serial/8250.c
+ *
+ * Copyright (C) 2010 Texas Instruments.
+ *
+ * Authors:
+ *	Govindraj R	<govindraj.raja@ti.com>
+ *	Thara Gopinath	<thara@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __OMAP_SERIAL_H__
+#define __OMAP_SERIAL_H__
+
+#include <linux/serial_core.h>
+#include <linux/device.h>
+#include <linux/pm_qos.h>
+
+#define DRIVER_NAME	"omap_uart"
+
+/*
+ * Use tty device name as ttyO, [O -> OMAP]
+ * in bootargs we specify as console=ttyO0 if uart1
+ * is used as console uart.
+ */
+#define OMAP_SERIAL_NAME	"ttyO"
+
+struct omap_uart_port_info {
+	bool			dma_enabled;	/* To specify DMA Mode */
+	unsigned int		uartclk;	/* UART clock rate */
+	upf_t			flags;		/* UPF_* flags */
+	unsigned int		dma_rx_buf_size;
+	unsigned int		dma_rx_timeout;
+	unsigned int		autosuspend_timeout;
+	unsigned int		dma_rx_poll_rate;
+	int			DTR_gpio;
+	int			DTR_inverted;
+	int			DTR_present;
+
+	int (*get_context_loss_count)(struct device *);
+	void (*set_forceidle)(struct device *);
+	void (*set_noidle)(struct device *);
+	void (*enable_wakeup)(struct device *, bool);
+};
+
+#endif /* __OMAP_SERIAL_H__ */
-- 
cgit v1.2.3


From 8e63b6a8adabb0551124c3b78f7f5f36912c3728 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 15 Dec 2012 15:21:52 -0500
Subject: NFSv4.1: Move the RPC timestamp out of the slot.

Shave a few bytes off the slot table size by moving the RPC timestamp
into the sequence results.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a55abd499c21..29adb12c7ecf 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -194,6 +194,7 @@ struct nfs4_sequence_args {
 
 struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
+	unsigned long		sr_timestamp;
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
 	u32			sr_highest_slotid;
-- 
cgit v1.2.3


From afc59400d6c65bad66d4ad0b2daf879cbff8e23e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 10 Dec 2012 18:01:37 -0500
Subject: nfsd4: cleanup: replace rq_resused count by rq_next_page pointer

It may be a matter of personal taste, but I find this makes the code
clearer.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d83db800fe02..676ddf53b3ee 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -243,6 +243,7 @@ struct svc_rqst {
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	int			rq_resused;	/* number of pages used for result */
+	struct page *		*rq_next_page; /* next reply page to use */
 
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 
@@ -338,9 +339,8 @@ xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p)
 
 static inline void svc_free_res_pages(struct svc_rqst *rqstp)
 {
-	while (rqstp->rq_resused) {
-		struct page **pp = (rqstp->rq_respages +
-				    --rqstp->rq_resused);
+	while (rqstp->rq_next_page != rqstp->rq_respages) {
+		struct page **pp = --rqstp->rq_next_page;
 		if (*pp) {
 			put_page(*pp);
 			*pp = NULL;
-- 
cgit v1.2.3


From 06ca287dbac9cc19d04ac2901b8c4882c03795ff Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 16 Oct 2012 23:56:14 +1030
Subject: virtio: move queue_index and num_free fields into core struct
 virtqueue.

They're generic concepts, so hoist them.  This also avoids accessor
functions (though kept around for merge with DaveM's net tree).

This goes even further than Jason Wang's 17bb6d4088 patch
("virtio-ring: move queue_index to vring_virtqueue") which moved the
queue_index from the specific transport.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 533b1157f22e..4b8f17d90458 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -16,12 +16,20 @@
  * @name: the name of this virtqueue (mainly for debugging)
  * @vdev: the virtio device this queue was created for.
  * @priv: a pointer for the virtqueue implementation to use.
+ * @index: the zero-based ordinal number for this queue.
+ * @num_free: number of elements we expect to be able to fit.
+ *
+ * A note on @num_free: with indirect buffers, each buffer needs one
+ * element in the queue, otherwise a buffer will need one element per
+ * sg element.
  */
 struct virtqueue {
 	struct list_head list;
 	void (*callback)(struct virtqueue *vq);
 	const char *name;
 	struct virtio_device *vdev;
+	unsigned int index;
+	unsigned int num_free;
 	void *priv;
 };
 
@@ -50,7 +58,11 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 
 unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
-int virtqueue_get_queue_index(struct virtqueue *vq);
+/* FIXME: Obsolete accessor, but required for virtio_net merge. */
+static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq)
+{
+	return vq->index;
+}
 
 /**
  * virtio_device - representation of a device using virtio
-- 
cgit v1.2.3


From 9bffdca8c64a72ac54c47a552734ab457bc720d4 Mon Sep 17 00:00:00 2001
From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Date: Tue, 11 Dec 2012 11:04:50 +1030
Subject: virtio: use dev_to_virtio wrapper in virtio

Use dev_to_virtio wrapper in virtio to make code clearly.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4b8f17d90458..5f1f80c76468 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -85,7 +85,11 @@ struct virtio_device {
 	void *priv;
 };
 
-#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
+static inline struct virtio_device *dev_to_virtio(struct device *_dev)
+{
+	return container_of(_dev, struct virtio_device, dev);
+}
+
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
-- 
cgit v1.2.3


From 9a2bdcc85d28506d4e5d4a9618fb133a3f40945d Mon Sep 17 00:00:00 2001
From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Date: Mon, 10 Dec 2012 16:38:33 +0800
Subject: virtio: add drv_to_virtio to make code clearly

Add drv_to_virtio wrapper to get virtio_driver from device_driver.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 5f1f80c76468..cf8adb1f5b2c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -119,6 +119,11 @@ struct virtio_driver {
 #endif
 };
 
+static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
+{
+	return container_of(drv, struct virtio_driver, driver);
+}
+
 int register_virtio_driver(struct virtio_driver *drv);
 void unregister_virtio_driver(struct virtio_driver *drv);
 #endif /* _LINUX_VIRTIO_H */
-- 
cgit v1.2.3


From 7a64bf05b2a6fe3703062d13d389e3eb904741c6 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:21:51 -0800
Subject: mm: add a __GFP_KMEMCG flag

This flag is used to indicate to the callees that this allocation is a
kernel allocation in process context, and should be accounted to current's
memcg.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f74856e17e48..643c9a6f7f34 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,6 +30,7 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
+#define ___GFP_KMEMCG		0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
@@ -89,6 +90,7 @@ struct vm_area_struct;
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
+#define __GFP_KMEMCG	((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
 /*
-- 
cgit v1.2.3


From 7ae1e1d0f8ac2927ed7e3ca6d15e42d485903459 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:21:56 -0800
Subject: memcg: kmem controller infrastructure

Introduce infrastructure for tracking kernel memory pages to a given
memcg.  This will happen whenever the caller includes the flag
__GFP_KMEMCG flag, and the task belong to a memcg other than the root.

In memcontrol.h those functions are wrapped in inline acessors.  The idea
is to later on, patch those with static branches, so we don't incur any
overhead when no mem cgroups with limited kmem are being used.

Users of this functionality shall interact with the memcg core code
through the following functions:

memcg_kmem_newpage_charge: will return true if the group can handle the
                           allocation. At this point, struct page is not
                           yet allocated.

memcg_kmem_commit_charge: will either revert the charge, if struct page
                          allocation failed, or embed memcg information
                          into page_cgroup.

memcg_kmem_uncharge_page: called at free time, will revert the charge.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 110 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e98a74c0c9c0..afa2ad40457e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,6 +21,7 @@
 #define _LINUX_MEMCONTROL_H
 #include <linux/cgroup.h>
 #include <linux/vm_event_item.h>
+#include <linux/hardirq.h>
 
 struct mem_cgroup;
 struct page_cgroup;
@@ -414,5 +415,114 @@ static inline void sock_release_memcg(struct sock *sk)
 {
 }
 #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool memcg_kmem_enabled(void)
+{
+	return true;
+}
+
+/*
+ * In general, we'll do everything in our power to not incur in any overhead
+ * for non-memcg users for the kmem functions. Not even a function call, if we
+ * can avoid it.
+ *
+ * Therefore, we'll inline all those functions so that in the best case, we'll
+ * see that kmemcg is off for everybody and proceed quickly.  If it is on,
+ * we'll still do most of the flag checking inline. We check a lot of
+ * conditions, but because they are pretty simple, they are expected to be
+ * fast.
+ */
+bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
+					int order);
+void __memcg_kmem_commit_charge(struct page *page,
+				       struct mem_cgroup *memcg, int order);
+void __memcg_kmem_uncharge_pages(struct page *page, int order);
+
+/**
+ * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
+ * @gfp: the gfp allocation flags.
+ * @memcg: a pointer to the memcg this was charged against.
+ * @order: allocation order.
+ *
+ * returns true if the memcg where the current task belongs can hold this
+ * allocation.
+ *
+ * We return true automatically if this allocation is not to be accounted to
+ * any memcg.
+ */
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	if (!memcg_kmem_enabled())
+		return true;
+
+	/*
+	 * __GFP_NOFAIL allocations will move on even if charging is not
+	 * possible. Therefore we don't even try, and have this allocation
+	 * unaccounted. We could in theory charge it with
+	 * res_counter_charge_nofail, but we hope those allocations are rare,
+	 * and won't be worth the trouble.
+	 */
+	if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+		return true;
+	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+		return true;
+
+	/* If the test is dying, just let it go. */
+	if (unlikely(fatal_signal_pending(current)))
+		return true;
+
+	return __memcg_kmem_newpage_charge(gfp, memcg, order);
+}
+
+/**
+ * memcg_kmem_uncharge_pages: uncharge pages from memcg
+ * @page: pointer to struct page being freed
+ * @order: allocation order.
+ *
+ * there is no need to specify memcg here, since it is embedded in page_cgroup
+ */
+static inline void
+memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+	if (memcg_kmem_enabled())
+		__memcg_kmem_uncharge_pages(page, order);
+}
+
+/**
+ * memcg_kmem_commit_charge: embeds correct memcg in a page
+ * @page: pointer to struct page recently allocated
+ * @memcg: the memcg structure we charged against
+ * @order: allocation order.
+ *
+ * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
+ * failure of the allocation. if @page is NULL, this function will revert the
+ * charges. Otherwise, it will commit the memcg given by @memcg to the
+ * corresponding page_cgroup.
+ */
+static inline void
+memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+{
+	if (memcg_kmem_enabled() && memcg)
+		__memcg_kmem_commit_charge(page, memcg, order);
+}
+
+#else
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	return true;
+}
+
+static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+}
+
+static inline void
+memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
-- 
cgit v1.2.3


From 6a1a0d3b625a4091e7a0eb249aefc6a644385149 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:00 -0800
Subject: mm: allocate kernel pages to the right memcg

When a process tries to allocate a page with the __GFP_KMEMCG flag, the
page allocator will call the corresponding memcg functions to validate
the allocation.  Tasks in the root memcg can always proceed.

To avoid adding markers to the page - and a kmem flag that would
necessarily follow, as much as doing page_cgroup lookups for no reason,
whoever is marking its allocations with __GFP_KMEMCG flag is responsible
for telling the page allocator that this is such an allocation at
free_pages() time.  This is done by the invocation of
__free_accounted_pages() and free_accounted_pages().

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 643c9a6f7f34..0f615eb23d05 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -367,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_cold_page(struct page *page, int cold);
 extern void free_hot_cold_page_list(struct list_head *list, int cold);
 
+extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
+extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
-- 
cgit v1.2.3


From 50bdd430c20566b13d8bc59946184b08f5875de6 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:04 -0800
Subject: res_counter: return amount of charges after res_counter_uncharge()

It is useful to know how many charges are still left after a call to
res_counter_uncharge.  While it is possible to issue a res_counter_read
after uncharge, this can be racy.

If we need, for instance, to take some action when the counters drop down
to 0, only one of the callers should see it.  This is the same semantics
as the atomic variables in the kernel.

Since the current return value is void, we don't need to worry about
anything breaking due to this change: nobody relied on that, and only
users appearing from now on will be checking this value.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6f54e40fa218..5ae8456d9670 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter,
  *
  * these calls check for usage underflow and show a warning on the console
  * _locked call expects the counter->lock to be taken
+ *
+ * returns the total charges still present in @counter.
  */
 
-void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
+u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
-void res_counter_uncharge_until(struct res_counter *counter,
-				struct res_counter *top,
-				unsigned long val);
+u64 res_counter_uncharge_until(struct res_counter *counter,
+			       struct res_counter *top,
+			       unsigned long val);
 /**
  * res_counter_margin - calculate chargeable space of a counter
  * @cnt: the counter
-- 
cgit v1.2.3


From a8964b9b84f99c0b1b5d7c09520f89f0700e742e Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:09 -0800
Subject: memcg: use static branches when code not in use

We can use static branches to patch the code in or out when not used.

Because the _ACTIVE bit on kmem_accounted is only set after the increment
is done, we guarantee that the root memcg will always be selected for kmem
charges until all call sites are patched (see memcg_kmem_enabled).  This
guarantees that no mischarges are applied.

Static branch decrement happens when the last reference count from the
kmem accounting in memcg dies.  This will only happen when the charges
drop down to 0.

When that happens, we need to disable the static branch only on those
memcgs that enabled it.  To achieve this, we would be forced to complicate
the code by keeping track of which memcgs were the ones that actually
enabled limits, and which ones got it from its parents.

It is a lot simpler just to do static_key_slow_inc() on every child
that is accounted.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index afa2ad40457e..87d61e840ddd 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -22,6 +22,7 @@
 #include <linux/cgroup.h>
 #include <linux/vm_event_item.h>
 #include <linux/hardirq.h>
+#include <linux/jump_label.h>
 
 struct mem_cgroup;
 struct page_cgroup;
@@ -417,9 +418,10 @@ static inline void sock_release_memcg(struct sock *sk)
 #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_MEMCG_KMEM
+extern struct static_key memcg_kmem_enabled_key;
 static inline bool memcg_kmem_enabled(void)
 {
-	return true;
+	return static_key_false(&memcg_kmem_enabled_key);
 }
 
 /*
-- 
cgit v1.2.3


From 2ad306b17c0ac5a1b1f250d5f772aeb87fdf1eba Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:18 -0800
Subject: fork: protect architectures where THREAD_SIZE >= PAGE_SIZE against
 fork bombs

Because those architectures will draw their stacks directly from the page
allocator, rather than the slab cache, we can directly pass __GFP_KMEMCG
flag, and issue the corresponding free_pages.

This code path is taken when the architecture doesn't define
CONFIG_ARCH_THREAD_INFO_ALLOCATOR (only ia64 seems to), and has
THREAD_SIZE >= PAGE_SIZE.  Luckily, most - if not all - of the remaining
architectures fall in this category.

This will guarantee that every stack page is accounted to the memcg the
process currently lives on, and will have the allocations to fail if they
go over limit.

For the time being, I am defining a new variant of THREADINFO_GFP, not to
mess with the other path.  Once the slab is also tracked by memcg, we can
get rid of that flag.

Tested to successfully protect against :(){ :|:& };:

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Frederic Weisbecker <fweisbec@redhat.com>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/thread_info.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ccc1899bd62e..e7e04736802f 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm);
 # define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK)
 #endif
 
+#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
+
 /*
  * flag set/clear/test wrappers
  * - pass TIF_xxxx constants to these functions
-- 
cgit v1.2.3


From ba6c496ed834a37a26fc6fc87fc9aecb0fa0014d Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:27 -0800
Subject: slab/slub: struct memcg_params

For the kmem slab controller, we need to record some extra information in
the kmem_cache structure.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Signed-off-by: Suleiman Souhlal <suleiman@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h     | 24 ++++++++++++++++++++++++
 include/linux/slab_def.h |  3 +++
 include/linux/slub_def.h |  3 +++
 3 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 743a10415122..00efba149222 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -176,6 +176,30 @@ void kmem_cache_free(struct kmem_cache *, void *);
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
 
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * struct kmem_cache will hold a pointer to it, so the memory cost while
+ * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
+ * would otherwise be if that would be bundled in kmem_cache: we'll need an
+ * extra pointer chase. But the trade off clearly lays in favor of not
+ * penalizing non-users.
+ *
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system.
+ *
+ * Child caches will hold extra metadata needed for its operation. Fields are:
+ *
+ * @memcg: pointer to the memcg this cache belongs to
+ */
+struct memcg_cache_params {
+	bool is_root_cache;
+	union {
+		struct kmem_cache *memcg_caches[0];
+		struct mem_cgroup *memcg;
+	};
+};
+
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 45c0356fdc8c..8bb6e0eaf3c6 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -81,6 +81,9 @@ struct kmem_cache {
 	 */
 	int obj_offset;
 #endif /* CONFIG_DEBUG_SLAB */
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_cache_params *memcg_params;
+#endif
 
 /* 6) per-cpu/per-node data, touched during every alloc/free */
 	/*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index df448adb7283..961e72eab907 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -101,6 +101,9 @@ struct kmem_cache {
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_cache_params *memcg_params;
+#endif
 
 #ifdef CONFIG_NUMA
 	/*
-- 
cgit v1.2.3


From 2633d7a028239a738b793be5ca8fa6ac312f5793 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:34 -0800
Subject: slab/slub: consider a memcg parameter in kmem_create_cache

Allow a memcg parameter to be passed during cache creation.  When the slub
allocator is being used, it will only merge caches that belong to the same
memcg.  We'll do this by scanning the global list, and then translating
the cache to a memcg-specific cache

Default function is created as a wrapper, passing NULL to the memcg
version.  We only merge caches that belong to the same memcg.

A helper is provided, memcg_css_id: because slub needs a unique cache name
for sysfs.  Since this is visible, but not the canonical location for slab
data, the cache name is not used, the css_id should suffice.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 26 ++++++++++++++++++++++++++
 include/linux/slab.h       | 14 +++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 87d61e840ddd..0b69a0470007 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -28,6 +28,7 @@ struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
+struct kmem_cache;
 
 /* Stats that can be updated by kernel. */
 enum mem_cgroup_page_stat_item {
@@ -441,6 +442,11 @@ void __memcg_kmem_commit_charge(struct page *page,
 				       struct mem_cgroup *memcg, int order);
 void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
+int memcg_cache_id(struct mem_cgroup *memcg);
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
+void memcg_release_cache(struct kmem_cache *cachep);
+void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
@@ -525,6 +531,26 @@ static inline void
 memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 {
 }
+
+static inline int memcg_cache_id(struct mem_cgroup *memcg)
+{
+	return -1;
+}
+
+static inline int memcg_register_cache(struct mem_cgroup *memcg,
+				       struct kmem_cache *s)
+{
+	return 0;
+}
+
+static inline void memcg_release_cache(struct kmem_cache *cachep)
+{
+}
+
+static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
+					struct kmem_cache *s)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 00efba149222..c0fcf28c15b2 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -116,6 +116,7 @@ struct kmem_cache {
 };
 #endif
 
+struct mem_cgroup;
 /*
  * struct kmem_cache related prototypes
  */
@@ -125,6 +126,9 @@ int slab_is_available(void);
 struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
 			void (*)(void *));
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
+			unsigned long, void (*)(void *));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
@@ -191,15 +195,23 @@ void kmem_cache_free(struct kmem_cache *, void *);
  * Child caches will hold extra metadata needed for its operation. Fields are:
  *
  * @memcg: pointer to the memcg this cache belongs to
+ * @list: list_head for the list of all caches in this memcg
+ * @root_cache: pointer to the global, root cache, this cache was derived from
  */
 struct memcg_cache_params {
 	bool is_root_cache;
 	union {
 		struct kmem_cache *memcg_caches[0];
-		struct mem_cgroup *memcg;
+		struct {
+			struct mem_cgroup *memcg;
+			struct list_head list;
+			struct kmem_cache *root_cache;
+		};
 	};
 };
 
+int memcg_update_all_caches(int num_memcgs);
+
 /*
  * Common kmalloc functions provided by all allocators
  */
-- 
cgit v1.2.3


From 55007d849759252ddd573aeb36143b947202d509 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:38 -0800
Subject: memcg: allocate memory for memcg caches whenever a new memcg appears

Every cache that is considered a root cache (basically the "original"
caches, tied to the root memcg/no-memcg) will have an array that should be
large enough to store a cache pointer per each memcg in the system.

Theoreticaly, this is as high as 1 << sizeof(css_id), which is currently
in the 64k pointers range.  Most of the time, we won't be using that much.

What goes in this patch, is a simple scheme to dynamically allocate such
an array, in order to minimize memory usage for memcg caches.  Because we
would also like to avoid allocations all the time, at least for now, the
array will only grow.  It will tend to be big enough to hold the maximum
number of kmem-limited memcgs ever achieved.

We'll allocate it to be a minimum of 64 kmem-limited memcgs.  When we have
more than that, we'll start doubling the size of this array every time the
limit is reached.

Because we are only considering kmem limited memcgs, a natural point for
this to happen is when we write to the limit.  At that point, we already
have set_limit_mutex held, so that will become our natural synchronization
mechanism.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0b69a0470007..45085e14e023 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -447,6 +447,8 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
 void memcg_release_cache(struct kmem_cache *cachep);
 void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
+void memcg_update_array_size(int num_groups);
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
-- 
cgit v1.2.3


From d7f25f8a2f81252d1ac134470ba1d0a287cf8fcd Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:40 -0800
Subject: memcg: infrastructure to match an allocation to the right cache

The page allocator is able to bind a page to a memcg when it is
allocated.  But for the caches, we'd like to have as many objects as
possible in a page belonging to the same cache.

This is done in this patch by calling memcg_kmem_get_cache in the
beginning of every allocation function.  This function is patched out by
static branches when kernel memory controller is not being used.

It assumes that the task allocating, which determines the memcg in the
page allocator, belongs to the same cgroup throughout the whole process.
Misaccounting can happen if the task calls memcg_kmem_get_cache() while
belonging to a cgroup, and later on changes.  This is considered
acceptable, and should only happen upon task migration.

Before the cache is created by the memcg core, there is also a possible
imbalance: the task belongs to a memcg, but the cache being allocated from
is the global cache, since the child cache is not yet guaranteed to be
ready.  This case is also fine, since in this case the GFP_KMEMCG will not
be passed and the page allocator will not attempt any cgroup accounting.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 45085e14e023..bd9b5d73bc2b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -449,6 +449,10 @@ void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
+
+struct kmem_cache *
+__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
@@ -518,6 +522,37 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 		__memcg_kmem_commit_charge(page, memcg, order);
 }
 
+/**
+ * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
+ * @cachep: the original global kmem cache
+ * @gfp: allocation flags.
+ *
+ * This function assumes that the task allocating, which determines the memcg
+ * in the page allocator, belongs to the same cgroup throughout the whole
+ * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
+ * while belonging to a cgroup, and later on changes. This is considered
+ * acceptable, and should only happen upon task migration.
+ *
+ * Before the cache is created by the memcg core, there is also a possible
+ * imbalance: the task belongs to a memcg, but the cache being allocated from
+ * is the global cache, since the child cache is not yet guaranteed to be
+ * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
+ * passed and the page allocator will not attempt any cgroup accounting.
+ */
+static __always_inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+	if (!memcg_kmem_enabled())
+		return cachep;
+	if (gfp & __GFP_NOFAIL)
+		return cachep;
+	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+		return cachep;
+	if (unlikely(fatal_signal_pending(current)))
+		return cachep;
+
+	return __memcg_kmem_get_cache(cachep, gfp);
+}
 #else
 static inline bool
 memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
@@ -553,6 +588,12 @@ static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
 					struct kmem_cache *s)
 {
 }
+
+static inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+	return cachep;
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
-- 
cgit v1.2.3


From 0e9d92f2d02d8c8320f0502307c688d07bdac2b3 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:42 -0800
Subject: memcg: skip memcg kmem allocations in specified code regions

Create a mechanism that skip memcg allocations during certain pieces of
our core code.  It basically works in the same way as
preempt_disable()/preempt_enable(): By marking a region under which all
allocations will be accounted to the root memcg.

We need this to prevent races in early cache creation, when we
allocate data using caches that are not necessarily created already.

Signed-off-by: Glauber Costa <glommer@parallels.com>
yCc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9914c662ed7b..f712465b05c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1597,6 +1597,7 @@ struct task_struct {
 		unsigned long nr_pages;	/* uncharged usage */
 		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
 	} memcg_batch;
+	unsigned int memcg_kmem_skip_account;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	atomic_t ptrace_bp_refcnt;
-- 
cgit v1.2.3


From b9ce5ef49f00daf2254c6953c8d31f79aabccd34 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:46 -0800
Subject: sl[au]b: always get the cache from its page in kmem_cache_free()

struct page already has this information.  If we start chaining caches,
this information will always be more trustworthy than whatever is passed
into the function.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bd9b5d73bc2b..2298122e71ad 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -554,6 +554,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 	return __memcg_kmem_get_cache(cachep, gfp);
 }
 #else
+static inline bool memcg_kmem_enabled(void)
+{
+	return false;
+}
+
 static inline bool
 memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 {
-- 
cgit v1.2.3


From d79923fad95b0cdf7770e024677180c734cb7148 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:48 -0800
Subject: sl[au]b: allocate objects from memcg cache

We are able to match a cache allocation to a particular memcg.  If the
task doesn't change groups during the allocation itself - a rare event,
this will give us a good picture about who is the first group to touch a
cache page.

This patch uses the now available infrastructure by calling
memcg_kmem_get_cache() before all the cache allocations.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 961e72eab907..364ba6c9fe21 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -225,7 +225,10 @@ void *__kmalloc(size_t size, gfp_t flags);
 static __always_inline void *
 kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 {
-	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+	void *ret;
+
+	flags |= (__GFP_COMP | __GFP_KMEMCG);
+	ret = (void *) __get_free_pages(flags, order);
 	kmemleak_alloc(ret, size, 1, flags);
 	return ret;
 }
-- 
cgit v1.2.3


From 1f458cbf122288b23620ee822e19bcbb76c8d6ec Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:50 -0800
Subject: memcg: destroy memcg caches

Implement destruction of memcg caches.  Right now, only caches where our
reference counter is the last remaining are deleted.  If there are any
other reference counters around, we just leave the caches lying around
until they go away.

When that happens, a destruction function is called from the cache code.
Caches are only destroyed in process context, so we queue them up for
later processing in the general case.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  2 ++
 include/linux/slab.h       | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2298122e71ad..79fcf0cd7186 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -453,6 +453,8 @@ void memcg_update_array_size(int num_groups);
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
diff --git a/include/linux/slab.h b/include/linux/slab.h
index c0fcf28c15b2..869efb8d2377 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -11,6 +11,8 @@
 
 #include <linux/gfp.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
+
 
 /*
  * Flags to pass to kmem_cache_create().
@@ -179,7 +181,6 @@ void kmem_cache_free(struct kmem_cache *, void *);
 #ifndef ARCH_SLAB_MINALIGN
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
-
 /*
  * This is the main placeholder for memcg-related information in kmem caches.
  * struct kmem_cache will hold a pointer to it, so the memory cost while
@@ -197,6 +198,10 @@ void kmem_cache_free(struct kmem_cache *, void *);
  * @memcg: pointer to the memcg this cache belongs to
  * @list: list_head for the list of all caches in this memcg
  * @root_cache: pointer to the global, root cache, this cache was derived from
+ * @dead: set to true after the memcg dies; the cache may still be around.
+ * @nr_pages: number of pages that belongs to this cache.
+ * @destroy: worker to be called whenever we are ready, or believe we may be
+ *           ready, to destroy this cache.
  */
 struct memcg_cache_params {
 	bool is_root_cache;
@@ -206,6 +211,9 @@ struct memcg_cache_params {
 			struct mem_cgroup *memcg;
 			struct list_head list;
 			struct kmem_cache *root_cache;
+			bool dead;
+			atomic_t nr_pages;
+			struct work_struct destroy;
 		};
 	};
 };
-- 
cgit v1.2.3


From 7cf2798240a2a2230cb16a391beef98d8a7ad362 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:55 -0800
Subject: memcg/sl[au]b: track all the memcg children of a kmem_cache

This enables us to remove all the children of a kmem_cache being
destroyed, if for example the kernel module it's being used in gets
unloaded.  Otherwise, the children will still point to the destroyed
parent.

Signed-off-by: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 79fcf0cd7186..e119f3ef793c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -454,6 +454,7 @@ struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
 void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
@@ -601,6 +602,10 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	return cachep;
 }
+
+static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
-- 
cgit v1.2.3


From 749c54151a6e5b229e4ae067dbc651e54b161fbc Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:23:01 -0800
Subject: memcg: aggregate memcg cache values in slabinfo

When we create caches in memcgs, we need to display their usage
information somewhere.  We'll adopt a scheme similar to /proc/meminfo,
with aggregate totals shown in the global file, and per-group information
stored in the group itself.

For the time being, only reads are allowed in the per-group cache.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 8 ++++++++
 include/linux/slab.h       | 4 ++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e119f3ef793c..8dc7c746b44f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -420,6 +420,11 @@ static inline void sock_release_memcg(struct sock *sk)
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key memcg_kmem_enabled_key;
+
+extern int memcg_limited_groups_array_size;
+#define for_each_memcg_cache_index(_idx)	\
+	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
+
 static inline bool memcg_kmem_enabled(void)
 {
 	return static_key_false(&memcg_kmem_enabled_key);
@@ -557,6 +562,9 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 	return __memcg_kmem_get_cache(cachep, gfp);
 }
 #else
+#define for_each_memcg_cache_index(_idx)	\
+	for (; NULL; )
+
 static inline bool memcg_kmem_enabled(void)
 {
 	return false;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 869efb8d2377..b9278663f22a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -220,6 +220,10 @@ struct memcg_cache_params {
 
 int memcg_update_all_caches(int num_memcgs);
 
+struct seq_file;
+int cache_show(struct kmem_cache *s, struct seq_file *m);
+void print_slabinfo_header(struct seq_file *m);
+
 /*
  * Common kmalloc functions provided by all allocators
  */
-- 
cgit v1.2.3


From 943a451a87d229ca564a27274b58eaeae35fde5d Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:23:03 -0800
Subject: slab: propagate tunable values

SLAB allows us to tune a particular cache behavior with tunables.  When
creating a new memcg cache copy, we'd like to preserve any tunables the
parent cache already had.

This could be done by an explicit call to do_tune_cpucache() after the
cache is created.  But this is not very convenient now that the caches are
created from common code, since this function is SLAB-specific.

Another method of doing that is taking advantage of the fact that
do_tune_cpucache() is always called from enable_cpucache(), which is
called at cache initialization.  We can just preset the values, and then
things work as expected.

It can also happen that a root cache has its tunables updated during
normal system operation.  In this case, we will propagate the change to
all caches that are already active.

This change will require us to move the assignment of root_cache in
memcg_params a bit earlier.  We need this to be already set - which
memcg_kmem_register_cache will do - when we reach __kmem_cache_create()

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 8 +++++---
 include/linux/slab.h       | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8dc7c746b44f..ea02ff970836 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -448,7 +448,8 @@ void __memcg_kmem_commit_charge(struct page *page,
 void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
-int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+			 struct kmem_cache *root_cache);
 void memcg_release_cache(struct kmem_cache *cachep);
 void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
@@ -590,8 +591,9 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return -1;
 }
 
-static inline int memcg_register_cache(struct mem_cgroup *memcg,
-				       struct kmem_cache *s)
+static inline int
+memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+		     struct kmem_cache *root_cache)
 {
 	return 0;
 }
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b9278663f22a..5d168d7e0a28 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -130,7 +130,7 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			void (*)(void *));
 struct kmem_cache *
 kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
-			unsigned long, void (*)(void *));
+			unsigned long, void (*)(void *), struct kmem_cache *);
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
-- 
cgit v1.2.3


From 107dab5c92d5f9c3afe962036e47c207363255c7 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:23:05 -0800
Subject: slub: slub-specific propagation changes

SLUB allows us to tune a particular cache behavior with sysfs-based
tunables.  When creating a new memcg cache copy, we'd like to preserve any
tunables the parent cache already had.

This can be done by tapping into the store attribute function provided by
the allocator.  We of course don't need to mess with read-only fields.
Since the attributes can have multiple types and are stored internally by
sysfs, the best strategy is to issue a ->show() in the root cache, and
then ->store() in the memcg cache.

The drawback of that, is that sysfs can allocate up to a page in buffering
for show(), that we are likely not to need, but also can't guarantee.  To
avoid always allocating a page for that, we can update the caches at store
time with the maximum attribute size ever stored to the root cache.  We
will then get a buffer big enough to hold it.  The corolary to this, is
that if no stores happened, nothing will be propagated.

It can also happen that a root cache has its tunables updated during
normal system operation.  In this case, we will propagate the change to
all caches that are already active.

[akpm@linux-foundation.org: tweak code to avoid __maybe_unused]
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 364ba6c9fe21..9db4825cd393 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -103,6 +103,7 @@ struct kmem_cache {
 #endif
 #ifdef CONFIG_MEMCG_KMEM
 	struct memcg_cache_params *memcg_params;
+	int max_attr_size; /* for propagation, maximum size of a stored attr */
 #endif
 
 #ifdef CONFIG_NUMA
-- 
cgit v1.2.3


From ebe945c27628fca03723582eba138acc2e2f3d15 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:23:10 -0800
Subject: memcg: add comments clarifying aspects of cache attribute propagation

This patch clarifies two aspects of cache attribute propagation.

First, the expected context for the for_each_memcg_cache macro in
memcontrol.h.  The usages already in the codebase are safe.  In mm/slub.c,
it is trivially safe because the lock is acquired right before the loop.
In mm/slab.c, it is less so: the lock is acquired by an outer function a
few steps back in the stack, so a VM_BUG_ON() is added to make sure it is
indeed safe.

A comment is also added to detail why we are returning the value of the
parent cache and ignoring the children's when we propagate the attributes.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ea02ff970836..0108a56f814e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -422,6 +422,12 @@ static inline void sock_release_memcg(struct sock *sk)
 extern struct static_key memcg_kmem_enabled_key;
 
 extern int memcg_limited_groups_array_size;
+
+/*
+ * Helper macro to loop through all memcg-specific caches. Callers must still
+ * check if the cache is valid (it is either valid or NULL).
+ * the slab_mutex must be held when looping through those caches
+ */
 #define for_each_memcg_cache_index(_idx)	\
 	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
 
-- 
cgit v1.2.3


From 7179e7bf4592ac5a7b30257a7df6259ee81e51da Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@huawei.com>
Date: Tue, 18 Dec 2012 14:23:19 -0800
Subject: mm/hugetlb: create hugetlb cgroup file in hugetlb_init

Build kernel with CONFIG_HUGETLBFS=y,CONFIG_HUGETLB_PAGE=y and
CONFIG_CGROUP_HUGETLB=y, then specify hugepagesz=xx boot option, system
will fail to boot.

This failure is caused by following code path:

  setup_hugepagesz
    hugetlb_add_hstate
      hugetlb_cgroup_file_init
        cgroup_add_cftypes
          kzalloc <--slab is *not available* yet

For this path, slab is not available yet, so memory allocated will be
failed, and cause WARN_ON() in hugetlb_cgroup_file_init().

So I move hugetlb_cgroup_file_init() into hugetlb_init().

[akpm@linux-foundation.org: tweak coding-style, remove pointless __init on inlined function]
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index d73878c694b3..ce8217f7b5c2 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 					 struct page *page);
 extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 					   struct hugetlb_cgroup *h_cg);
-extern int hugetlb_cgroup_file_init(int idx) __init;
+extern void hugetlb_cgroup_file_init(void) __init;
 extern void hugetlb_cgroup_migrate(struct page *oldhpage,
 				   struct page *newhpage);
 
@@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 	return;
 }
 
-static inline int __init hugetlb_cgroup_file_init(int idx)
+static inline void hugetlb_cgroup_file_init(void)
 {
-	return 0;
 }
 
 static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
-- 
cgit v1.2.3


From 59771079c18c44e39106f0f30054025acafadb41 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 19 Dec 2012 07:18:35 -0800
Subject: blk: avoid divide-by-zero with zero discard granularity

Commit 8dd2cb7e880d ("block: discard granularity might not be power of
2") changed a couple of 'binary and' operations into modulus operations.
Which turned the harmless case of a zero discard_granularity into a
possible divide-by-zero.

The code also had a much more subtle bug: it was doing the modulus of a
value in bytes using 'sector_t'.  That was always conceptually wrong,
but didn't actually matter back when the code assumed a power-of-two
granularity: we only looked at the low bits anyway.

But with potentially arbitrary sector numbers, using a 'sector_t' to
express bytes is very very wrong: depending on configuration it limits
the starting offset of the device to just 32 bits, and any overflow
would result in a wrong value if the modulus wasn't a power-of-two.

So re-write the code to not only protect against the divide-by-zero, but
to do the starting sector arithmetic in sectors, and using the proper
types.

[ For any mathematicians out there: it also looks monumentally stupid to
  do the 'modulo granularity' operation *twice*, never mind having a "+
  granularity" in the second modulus op.

  But that's the easiest way to avoid negative values or overflow, and
  it is how the original code was done. ]

Reported-by: Ingo Molnar <mingo@kernel.org>
Reported-by: Doug Anderson <dianders@chromium.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Shaohua Li <shli@fusionio.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index acb4f7bbbd32..f94bc83011ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1188,14 +1188,25 @@ static inline int queue_discard_alignment(struct request_queue *q)
 
 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
-	sector_t alignment = sector << 9;
-	alignment = sector_div(alignment, lim->discard_granularity);
+	unsigned int alignment, granularity, offset;
 
 	if (!lim->max_discard_sectors)
 		return 0;
 
-	alignment = lim->discard_granularity + lim->discard_alignment - alignment;
-	return sector_div(alignment, lim->discard_granularity);
+	/* Why are these in bytes, not sectors? */
+	alignment = lim->discard_alignment >> 9;
+	granularity = lim->discard_granularity >> 9;
+	if (!granularity)
+		return 0;
+
+	/* Offset of the partition start in 'granularity' sectors */
+	offset = sector_div(sector, granularity);
+
+	/* And why do we do this modulus *again* in blkdev_issue_discard()? */
+	offset = (granularity + alignment - offset) % granularity;
+
+	/* Turn it back into bytes, gaah */
+	return offset << 9;
 }
 
 static inline int bdev_discard_alignment(struct block_device *bdev)
-- 
cgit v1.2.3


From ab28698d33af05abab0bcf8021eafe38f7434f24 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jogo@openwrt.org>
Date: Wed, 19 Dec 2012 09:10:09 -0600
Subject: of: define struct device in of_platform.h if !OF_DEVICE and
 !OF_ADDRESS

Fixes the following warning:

include/linux/of_platform.h:106:13: warning: 'struct device' declared
inside parameter list [enabled by default]
include/linux/of_platform.h:106:13: warning: its scope is only this
definition or declaration, which is probably not what you want [enabled
by default]

Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Signed-off-by: Rob Herring <robherring2@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_platform.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index b47d2040c9f2..3863a4dbdf18 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -100,6 +100,7 @@ extern int of_platform_populate(struct device_node *root,
 
 #if !defined(CONFIG_OF_ADDRESS)
 struct of_dev_auxdata;
+struct device;
 static inline int of_platform_populate(struct device_node *root,
 					const struct of_device_id *matches,
 					const struct of_dev_auxdata *lookup,
-- 
cgit v1.2.3


From 3c439b5586e9200f7e6287ee77c175c4d5b0eeed Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 6 Dec 2012 17:12:00 +0000
Subject: mlx4_core: Allow choosing flow steering mode

Device managed flow steering will be enabled only under administrator
directive provided through setting the existing module parameter
log_num_mgm_entry_size to -1 (if the device actually supports flow
steering).  If flow steering isn't requested or not available, the
driver will use the value of log_num_mgm_entry_size and B0 steering.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 include/linux/mlx4/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 21821da2abfd..20ea939c22a6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -625,6 +625,7 @@ struct mlx4_dev {
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
 	int			num_vfs;
+	int			oper_log_mgm_entry_size;
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 };
-- 
cgit v1.2.3


From a1c088e01b71d90852b0df5a77cdae46bd0e0c05 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 18 Dec 2012 04:45:29 +0000
Subject: usbnet: handle PM failure gracefully

If a device fails to do remote wakeup, this is no reason
to abort an open totally. This patch just continues without
runtime PM.

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 9bbeabf66c54..288b32aadab2 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -69,6 +69,7 @@ struct usbnet {
 #		define EVENT_DEV_ASLEEP 6
 #		define EVENT_DEV_OPEN	7
 #		define EVENT_DEVICE_REPORT_IDLE	8
+#		define EVENT_NO_RUNTIME_PM	9
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
cgit v1.2.3


From 2dd7c8cf29769f6b66f26b501db2364640c2c9d0 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 18 Dec 2012 04:45:52 +0000
Subject: usbnet: generic manage_power()

Centralise common code for manage_power() in usbnet
by making a generic simple implementation

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 288b32aadab2..bd45eb7bedc8 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -241,4 +241,6 @@ extern void usbnet_set_msglevel(struct net_device *, u32);
 extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 extern int usbnet_nway_reset(struct net_device *net);
 
+extern int usbnet_manage_power(struct usbnet *, int);
+
 #endif /* __LINUX_USB_USBNET_H */
-- 
cgit v1.2.3


From cf13a84d174947df4bb809edfb4887393642303e Mon Sep 17 00:00:00 2001
From: Fabio Porcedda <fabio.porcedda@gmail.com>
Date: Fri, 5 Oct 2012 12:16:09 +0200
Subject: watchdog: WatchDog Timer Driver Core: fix comment

Signed-off-by: Fabio Porcedda <fabio.porcedda@gmail.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 87490ac4bd87..3a9df2f43be6 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -129,7 +129,7 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
 	return wdd->driver_data;
 }
 
-/* drivers/watchdog/core/watchdog_core.c */
+/* drivers/watchdog/watchdog_core.c */
 extern int watchdog_register_device(struct watchdog_device *);
 extern void watchdog_unregister_device(struct watchdog_device *);
 
-- 
cgit v1.2.3


From 468366138850f20543f1d4878028900672b23dae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Nov 2012 09:12:59 -0500
Subject: COMPAT_SYSCALL_DEFINE: infrastructure

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/compat.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 784ebfe63c48..a7877fa809fd 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -23,6 +23,48 @@
 #define COMPAT_USE_64BIT_TIME 0
 #endif
 
+#ifndef __SC_DELOUSE
+#define __SC_DELOUSE(t,v) ((t)(unsigned long)(v))
+#endif
+
+#define __SC_CCAST1(t1, a1)      __SC_DELOUSE(t1,a1)
+#define __SC_CCAST2(t2, a2, ...) __SC_DELOUSE(t2,a2), __SC_CCAST1(__VA_ARGS__)
+#define __SC_CCAST3(t3, a3, ...) __SC_DELOUSE(t3,a3), __SC_CCAST2(__VA_ARGS__)
+#define __SC_CCAST4(t4, a4, ...) __SC_DELOUSE(t4,a4), __SC_CCAST3(__VA_ARGS__)
+#define __SC_CCAST5(t5, a5, ...) __SC_DELOUSE(t5,a5), __SC_CCAST4(__VA_ARGS__)
+#define __SC_CCAST6(t6, a6, ...) __SC_DELOUSE(t6,a6), __SC_CCAST5(__VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE1(name, ...) \
+        COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE2(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE3(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE4(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE5(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE6(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
+
+#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
+	asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__));	\
+	static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__));	\
+	asmlinkage long compat_SyS##name(__SC_LONG##x(__VA_ARGS__))	\
+	{								\
+		return (long) C_SYSC##name(__SC_CCAST##x(__VA_ARGS__));	\
+	}								\
+	SYSCALL_ALIAS(compat_sys##name, compat_SyS##name);		\
+	static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__))
+
+#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
+	asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__))
+
+#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
-- 
cgit v1.2.3


From ae903caae267154de7cf8576b130ff474630596b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Dec 2012 12:44:11 -0500
Subject: Bury the conditionals from kernel_thread/kernel_execve series

All architectures have
	CONFIG_GENERIC_KERNEL_THREAD
	CONFIG_GENERIC_KERNEL_EXECVE
	__ARCH_WANT_SYS_EXECVE
None of them have __ARCH_WANT_KERNEL_EXECVE and there are only two callers
of kernel_execve() (which is a trivial wrapper for do_execve() now) left.
Kill the conditionals and make both callers use do_execve().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/binfmts.h  | 4 ----
 include/linux/sched.h    | 2 --
 include/linux/syscalls.h | 9 ---------
 3 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2630c9b41a86..8c1388c6ae27 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -121,8 +121,4 @@ extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
-#ifdef __ARCH_WANT_KERNEL_EXECVE
-extern void ret_from_kernel_execve(struct pt_regs *normal) __noreturn;
-#endif
-
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1162258bcaf0..9e5a54e3d84f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2291,9 +2291,7 @@ extern int do_execve(const char *,
 		     const char __user * const __user *);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
-#ifdef CONFIG_GENERIC_KERNEL_THREAD
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-#endif
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91835e7f364d..9fe5f946526e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -827,15 +827,6 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
 				  const char  __user *pathname);
 asmlinkage long sys_syncfs(int fd);
 
-#ifndef CONFIG_GENERIC_KERNEL_EXECVE
-int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]);
-#else
-#define kernel_execve(filename, argv, envp) \
-	do_execve(filename, \
-		(const char __user *const __user *)argv, \
-		(const char __user *const __user *)envp)
-#endif
-
 asmlinkage long sys_fork(void);
 asmlinkage long sys_vfork(void);
 #ifdef CONFIG_CLONE_BACKWARDS
-- 
cgit v1.2.3


From 1ca97bb541a1f5a735e697a8bba763cde3aab452 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 18 Nov 2012 12:50:10 -0500
Subject: new helper: current_user_stack_pointer()

	Cross-architecture equivalent of rdusp(); default is
user_stack_pointer(current_pt_regs()) - that works for almost all
platforms that have usp saved in pt_regs.  The only exception from
that is ia64 - we want memory stack, not the backing store for
register one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/ptrace.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a89ff04bddd9..a3a9d085f932 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -342,6 +342,10 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #define signal_pt_regs() task_pt_regs(current)
 #endif
 
+#ifndef current_user_stack_pointer
+#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
-- 
cgit v1.2.3


From 5c49574ffd7ac07eae8c3b065d19e6ebc7e4760f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 18 Nov 2012 15:29:16 -0500
Subject: new helper: restore_altstack()

to be used by rt_sigreturn instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/signal.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index e19a011b43b7..5969522136fe 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -385,4 +385,6 @@ int unhandled_signal(struct task_struct *tsk, int sig);
 
 void signals_init(void);
 
+int restore_altstack(const stack_t __user *);
+
 #endif /* _LINUX_SIGNAL_H */
-- 
cgit v1.2.3


From 9b064fc3f95a8e44e929fdf4d6037334ea03d15b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Dec 2012 13:49:35 -0500
Subject: new helper: compat_user_stack_pointer()

Compat counterpart of current_user_stack_pointer(); for most of the biarch
architectures those two are identical, but e.g. arm64 and arm use different
registers for stack pointer...

Note that amd64 variants of current_user_stack_pointer/compat_user_stack_pointer
do *not* rely on pt_regs having been through FIXUP_TOP_OF_STACK.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/compat.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index a7877fa809fd..62bb76f91baf 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -65,6 +65,9 @@
 
 #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
 
+#ifndef compat_user_stack_pointer
+#define compat_user_stack_pointer() current_user_stack_pointer()
+#endif
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
-- 
cgit v1.2.3


From 6bf9adfc90370b695cb111116e15fdc0e1906270 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Dec 2012 14:09:47 -0500
Subject: introduce generic sys_sigaltstack(), switch x86 and um to it

Conditional on CONFIG_GENERIC_SIGALTSTACK; architectures that do not
select it are completely unaffected

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/syscalls.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9fe5f946526e..6ca1e08210c6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -63,6 +63,7 @@ struct getcpu_cache;
 struct old_linux_dirent;
 struct perf_event_attr;
 struct file_handle;
+struct sigaltstack;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -299,6 +300,11 @@ asmlinkage long sys_personality(unsigned int personality);
 asmlinkage long sys_sigpending(old_sigset_t __user *set);
 asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
 				old_sigset_t __user *oset);
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss,
+				struct sigaltstack __user *uoss);
+#endif
+
 asmlinkage long sys_getitimer(int which, struct itimerval __user *value);
 asmlinkage long sys_setitimer(int which,
 				struct itimerval __user *value,
-- 
cgit v1.2.3


From 9026843952adac5b123c7b8dc961e5c15828d9e1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Dec 2012 14:47:53 -0500
Subject: generic compat_sys_sigaltstack()

Again, conditional on CONFIG_GENERIC_SIGALTSTACK

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/compat.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 62bb76f91baf..cb5637e2ee2c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -68,6 +68,16 @@
 #ifndef compat_user_stack_pointer
 #define compat_user_stack_pointer() current_user_stack_pointer()
 #endif
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+#ifndef compat_sigaltstack	/* we'll need that for MIPS */
+typedef struct compat_sigaltstack {
+	compat_uptr_t			ss_sp;
+	int				ss_flags;
+	compat_size_t			ss_size;
+} compat_stack_t;
+#endif
+#endif
+
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
@@ -632,6 +642,12 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 
 asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
 				    compat_off_t __user *offset, compat_size_t count);
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
+				       compat_stack_t __user *uoss_ptr);
+
+int compat_restore_altstack(const compat_stack_t __user *uss);
+#endif
 
 #else
 
-- 
cgit v1.2.3


From c40702c49faef05ae324f121d8b3e215244ee152 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 20 Nov 2012 14:24:26 -0500
Subject: new helpers: __save_altstack/__compat_save_altstack, switch x86 and
 um to those

note that they are relying on access_ok() already checked by caller.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/compat.h | 1 +
 include/linux/signal.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index cb5637e2ee2c..334813307ec1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -647,6 +647,7 @@ asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
 				       compat_stack_t __user *uoss_ptr);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
+int __compat_save_altstack(compat_stack_t __user *, unsigned long);
 #endif
 
 #else
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 5969522136fe..0a89ffc48466 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -386,5 +386,6 @@ int unhandled_signal(struct task_struct *tsk, int sig);
 void signals_init(void);
 
 int restore_altstack(const stack_t __user *);
+int __save_altstack(stack_t __user *, unsigned long);
 
 #endif /* _LINUX_SIGNAL_H */
-- 
cgit v1.2.3


From ada65c74059f8c104f1b467c126205471634c435 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Wed, 12 Dec 2012 10:23:03 +0100
Subject: dma-buf: remove fallback for !CONFIG_DMA_SHARED_BUFFER

Documentation says that code requiring dma-buf should add it to
select, so inline fallbacks are not going to be used. A link error
will make it obvious what went wrong, instead of silently doing
nothing at runtime.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Rob Clark <rob.clark@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 include/linux/dma-buf.h | 99 -------------------------------------------------
 1 file changed, 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index eb48f3816df9..bd2e52ccc4f2 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -156,7 +156,6 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
 	get_file(dmabuf->file);
 }
 
-#ifdef CONFIG_DMA_SHARED_BUFFER
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 							struct device *dev);
 void dma_buf_detach(struct dma_buf *dmabuf,
@@ -184,103 +183,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
 		 unsigned long);
 void *dma_buf_vmap(struct dma_buf *);
 void dma_buf_vunmap(struct dma_buf *, void *vaddr);
-#else
-
-static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-							struct device *dev)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_detach(struct dma_buf *dmabuf,
-				  struct dma_buf_attachment *dmabuf_attach)
-{
-	return;
-}
-
-static inline struct dma_buf *dma_buf_export(void *priv,
-					     const struct dma_buf_ops *ops,
-					     size_t size, int flags)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline int dma_buf_fd(struct dma_buf *dmabuf, int flags)
-{
-	return -ENODEV;
-}
-
-static inline struct dma_buf *dma_buf_get(int fd)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_put(struct dma_buf *dmabuf)
-{
-	return;
-}
-
-static inline struct sg_table *dma_buf_map_attachment(
-	struct dma_buf_attachment *attach, enum dma_data_direction write)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
-			struct sg_table *sg, enum dma_data_direction dir)
-{
-	return;
-}
-
-static inline int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-					   size_t start, size_t len,
-					   enum dma_data_direction dir)
-{
-	return -ENODEV;
-}
-
-static inline void dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-					  size_t start, size_t len,
-					  enum dma_data_direction dir)
-{
-}
-
-static inline void *dma_buf_kmap_atomic(struct dma_buf *dmabuf,
-					unsigned long pnum)
-{
-	return NULL;
-}
-
-static inline void dma_buf_kunmap_atomic(struct dma_buf *dmabuf,
-					 unsigned long pnum, void *vaddr)
-{
-}
-
-static inline void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long pnum)
-{
-	return NULL;
-}
-
-static inline void dma_buf_kunmap(struct dma_buf *dmabuf,
-				  unsigned long pnum, void *vaddr)
-{
-}
-
-static inline int dma_buf_mmap(struct dma_buf *dmabuf,
-			       struct vm_area_struct *vma,
-			       unsigned long pgoff)
-{
-	return -ENODEV;
-}
-
-static inline void *dma_buf_vmap(struct dma_buf *dmabuf)
-{
-	return NULL;
-}
-
-static inline void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
-{
-}
-#endif /* CONFIG_DMA_SHARED_BUFFER */
 
 #endif /* __DMA_BUF_H__ */
-- 
cgit v1.2.3


From 39e3c9553f34381a1b664c27b0c696a266a5735e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 28 Nov 2012 11:30:53 -0500
Subject: vfs: remove DCACHE_NEED_LOOKUP

The code that relied on that flag was ripped out of btrfs quite some
time ago, and never added back. Josef indicated that he was going to
take a different approach to the problem in btrfs, and that we
could just eliminate this flag.

Cc: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 59200795482e..c1754b59ddd3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -202,7 +202,6 @@ struct dentry_operations {
 #define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
 #define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
 #define DCACHE_MANAGE_TRANSIT	0x40000	/* manage transit from this dirent */
-#define DCACHE_NEED_LOOKUP	0x80000 /* dentry requires i_op->lookup */
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
@@ -408,13 +407,6 @@ static inline bool d_mountpoint(struct dentry *dentry)
 	return dentry->d_flags & DCACHE_MOUNTED;
 }
 
-static inline bool d_need_lookup(struct dentry *dentry)
-{
-	return dentry->d_flags & DCACHE_NEED_LOOKUP;
-}
-
-extern void d_clear_need_lookup(struct dentry *dentry);
-
 extern int sysctl_vfs_cache_pressure;
 
 #endif	/* __LINUX_DCACHE_H */
-- 
cgit v1.2.3


From c4d6d8dbf335c7fa47341654a37c53a512b519bb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:32 +0000
Subject: CacheFiles: Fix the marking of cached pages

Under some circumstances CacheFiles defers the marking of pages with PG_fscache
so that it can take advantage of pagevecs to reduce the number of calls to
fscache_mark_pages_cached() and the netfs's hook to keep track of this.

There are, however, two problems with this:

 (1) It can lead to the PG_fscache mark being applied _after_ the page is set
     PG_uptodate and unlocked (by the call to fscache_end_io()).

 (2) CacheFiles's ref on the page is dropped immediately following
     fscache_end_io() - and so may not still be held when the mark is applied.
     This can lead to the page being passed back to the allocator before the
     mark is applied.

Fix this by, where appropriate, marking the page before calling
fscache_end_io() and releasing the page.  This means that we can't take
advantage of pagevecs and have to make a separate call for each page to the
marking routines.

The symptoms of this are Bad Page state errors cropping up under memory
pressure, for example:

BUG: Bad page state in process tar  pfn:002da
page:ffffea0000009fb0 count:0 mapcount:0 mapping:          (null) index:0x1447
page flags: 0x1000(private_2)
Pid: 4574, comm: tar Tainted: G        W   3.1.0-rc4-fsdevel+ #1064
Call Trace:
 [<ffffffff8109583c>] ? dump_page+0xb9/0xbe
 [<ffffffff81095916>] bad_page+0xd5/0xea
 [<ffffffff81095d82>] get_page_from_freelist+0x35b/0x46a
 [<ffffffff810961f3>] __alloc_pages_nodemask+0x362/0x662
 [<ffffffff810989da>] __do_page_cache_readahead+0x13a/0x267
 [<ffffffff81098942>] ? __do_page_cache_readahead+0xa2/0x267
 [<ffffffff81098d7b>] ra_submit+0x1c/0x20
 [<ffffffff8109900a>] ondemand_readahead+0x28b/0x29a
 [<ffffffff81098ee2>] ? ondemand_readahead+0x163/0x29a
 [<ffffffff810990ce>] page_cache_sync_readahead+0x38/0x3a
 [<ffffffff81091d8a>] generic_file_aio_read+0x2ab/0x67e
 [<ffffffffa008cfbe>] nfs_file_read+0xa4/0xc9 [nfs]
 [<ffffffff810c22c4>] do_sync_read+0xba/0xfa
 [<ffffffff81177a47>] ? security_file_permission+0x7b/0x84
 [<ffffffff810c25dd>] ? rw_verify_area+0xab/0xc8
 [<ffffffff810c29a4>] vfs_read+0xaa/0x13a
 [<ffffffff810c2a79>] sys_read+0x45/0x6c
 [<ffffffff813ac37b>] system_call_fastpath+0x16/0x1b

As can be seen, PG_private_2 (== PG_fscache) is set in the page flags.

Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was
set appropriately showed that sometimes it wasn't.  This led to the discovery
that sometimes the page has apparently been reclaimed by the time the marker
got to see it.

Reported-by: M. Stevens <m@tippett.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 include/linux/fscache-cache.h |  3 +++
 include/linux/fscache.h       | 12 ++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index ce31408b1e47..9879183b55d8 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -504,6 +504,9 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache);
 
 extern void fscache_io_error(struct fscache_cache *cache);
 
+extern void fscache_mark_page_cached(struct fscache_retrieval *op,
+				     struct page *page);
+
 extern void fscache_mark_pages_cached(struct fscache_retrieval *op,
 				      struct pagevec *pagevec);
 
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9ec20dec3353..f4b6353543bf 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -135,14 +135,14 @@ struct fscache_cookie_def {
 	 */
 	void (*put_context)(void *cookie_netfs_data, void *context);
 
-	/* indicate pages that now have cache metadata retained
-	 * - this function should mark the specified pages as now being cached
-	 * - the pages will have been marked with PG_fscache before this is
+	/* indicate page that now have cache metadata retained
+	 * - this function should mark the specified page as now being cached
+	 * - the page will have been marked with PG_fscache before this is
 	 *   called, so this is optional
 	 */
-	void (*mark_pages_cached)(void *cookie_netfs_data,
-				  struct address_space *mapping,
-				  struct pagevec *cached_pvec);
+	void (*mark_page_cached)(void *cookie_netfs_data,
+				 struct address_space *mapping,
+				 struct page *page);
 
 	/* indicate the cookie is no longer cached
 	 * - this function is called when the backing store currently caching
-- 
cgit v1.2.3


From ef46ed888efb1e8da33be5d33c9b54476289a43b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:35 +0000
Subject: FS-Cache: Make cookie relinquishment wait for outstanding reads

Make fscache_relinquish_cookie() log a warning and wait if there are any
outstanding reads left on the cookie it was given.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 9879183b55d8..e3d6d939d959 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -301,6 +301,7 @@ struct fscache_cookie {
 #define FSCACHE_COOKIE_PENDING_FILL	3	/* T if pending initial fill on object */
 #define FSCACHE_COOKIE_FILLING		4	/* T if filling object incrementally */
 #define FSCACHE_COOKIE_UNAVAILABLE	5	/* T if cookie is unavailable (error, etc) */
+#define FSCACHE_COOKIE_WAITING_ON_READS	6	/* T if cookie is waiting on reads */
 };
 
 extern struct fscache_cookie fscache_fsdef_index;
-- 
cgit v1.2.3


From 9f10523f891928330b7529da54c1a3cc65180b1a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:35 +0000
Subject: FS-Cache: Fix operation state management and accounting

Fix the state management of internal fscache operations and the accounting of
what operations are in what states.

This is done by:

 (1) Give struct fscache_operation a enum variable that directly represents the
     state it's currently in, rather than spreading this knowledge over a bunch
     of flags, who's processing the operation at the moment and whether it is
     queued or not.

     This makes it easier to write assertions to check the state at various
     points and to prevent invalid state transitions.

 (2) Add an 'operation complete' state and supply a function to indicate the
     completion of an operation (fscache_op_complete()) and make things call
     it.  The final call to fscache_put_operation() can then check that an op
     in the appropriate state (complete or cancelled).

 (3) Adjust the use of object->n_ops, ->n_in_progress, ->n_exclusive to better
     govern the state of an object:

	(a) The ->n_ops is now the number of extant operations on the object
	    and is now decremented by fscache_put_operation() only.

	(b) The ->n_in_progress is simply the number of objects that have been
	    taken off of the object's pending queue for the purposes of being
	    run.  This is decremented by fscache_op_complete() only.

	(c) The ->n_exclusive is the number of exclusive ops that have been
	    submitted and queued or are in progress.  It is decremented by
	    fscache_op_complete() and by fscache_cancel_op().

     fscache_put_operation() and fscache_operation_gc() now no longer try to
     clean up ->n_exclusive and ->n_in_progress.  That was leading to double
     decrements against fscache_cancel_op().

     fscache_cancel_op() now no longer decrements ->n_ops.  That was leading to
     double decrements against fscache_put_operation().

     fscache_submit_exclusive_op() now decides whether it has to queue an op
     based on ->n_in_progress being > 0 rather than ->n_ops > 0 as the latter
     will persist in being true even after all preceding operations have been
     cancelled or completed.  Furthermore, if an object is active and there are
     runnable ops against it, there must be at least one op running.

 (4) Add a remaining-pages counter (n_pages) to struct fscache_retrieval and
     provide a function to record completion of the pages as they complete.

     When n_pages reaches 0, the operation is deemed to be complete and
     fscache_op_complete() is called.

     Add calls to fscache_retrieval_complete() anywhere we've finished with a
     page we've been given to read or allocate for.  This includes places where
     we just return pages to the netfs for reading from the server and where
     accessing the cache fails and we discard the proposed netfs page.

The bugs in the unfixed state management manifest themselves as oopses like the
following where the operation completion gets out of sync with return of the
cookie by the netfs.  This is possible because the cache unlocks and returns
all the netfs pages before recording its completion - which means that there's
nothing to stop the netfs discarding them and returning the cookie.


FS-Cache: Cookie 'NFS.fh' still has outstanding reads
------------[ cut here ]------------
kernel BUG at fs/fscache/cookie.c:519!
invalid opcode: 0000 [#1] SMP
CPU 1
Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc

Pid: 400, comm: kswapd0 Not tainted 3.1.0-rc7-fsdevel+ #1090                  /DG965RY
RIP: 0010:[<ffffffffa007050a>]  [<ffffffffa007050a>] __fscache_relinquish_cookie+0x170/0x343 [fscache]
RSP: 0018:ffff8800368cfb00  EFLAGS: 00010282
RAX: 000000000000003c RBX: ffff880023cc8790 RCX: 0000000000000000
RDX: 0000000000002f2e RSI: 0000000000000001 RDI: ffffffff813ab86c
RBP: ffff8800368cfb50 R08: 0000000000000002 R09: 0000000000000000
R10: ffff88003a1b7890 R11: ffff88001df6e488 R12: ffff880023d8ed98
R13: ffff880023cc8798 R14: 0000000000000004 R15: ffff88003b8bf370
FS:  0000000000000000(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00000000008ba008 CR3: 0000000023d93000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process kswapd0 (pid: 400, threadinfo ffff8800368ce000, task ffff88003b8bf040)
Stack:
 ffff88003b8bf040 ffff88001df6e528 ffff88001df6e528 ffffffffa00b46b0
 ffff88003b8bf040 ffff88001df6e488 ffff88001df6e620 ffffffffa00b46b0
 ffff88001ebd04c8 0000000000000004 ffff8800368cfb70 ffffffffa00b2c91
Call Trace:
 [<ffffffffa00b2c91>] nfs_fscache_release_inode_cookie+0x3b/0x47 [nfs]
 [<ffffffffa008f25f>] nfs_clear_inode+0x3c/0x41 [nfs]
 [<ffffffffa0090df1>] nfs4_evict_inode+0x2f/0x33 [nfs]
 [<ffffffff810d8d47>] evict+0xa1/0x15c
 [<ffffffff810d8e2e>] dispose_list+0x2c/0x38
 [<ffffffff810d9ebd>] prune_icache_sb+0x28c/0x29b
 [<ffffffff810c56b7>] prune_super+0xd5/0x140
 [<ffffffff8109b615>] shrink_slab+0x102/0x1ab
 [<ffffffff8109d690>] balance_pgdat+0x2f2/0x595
 [<ffffffff8103e009>] ? process_timeout+0xb/0xb
 [<ffffffff8109dba3>] kswapd+0x270/0x289
 [<ffffffff8104c5ea>] ? __init_waitqueue_head+0x46/0x46
 [<ffffffff8109d933>] ? balance_pgdat+0x595/0x595
 [<ffffffff8104bf7a>] kthread+0x7f/0x87
 [<ffffffff813ad6b4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81026b98>] ? finish_task_switch+0x45/0xc0
 [<ffffffff813abcdd>] ? retint_restore_args+0xe/0xe
 [<ffffffff8104befb>] ? __init_kthread_worker+0x53/0x53
 [<ffffffff813ad6b0>] ? gs_change+0xb/0xb

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index e3d6d939d959..f5facd1d333f 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -75,6 +75,16 @@ extern wait_queue_head_t fscache_cache_cleared_wq;
 typedef void (*fscache_operation_release_t)(struct fscache_operation *op);
 typedef void (*fscache_operation_processor_t)(struct fscache_operation *op);
 
+enum fscache_operation_state {
+	FSCACHE_OP_ST_BLANK,		/* Op is not yet submitted */
+	FSCACHE_OP_ST_INITIALISED,	/* Op is initialised */
+	FSCACHE_OP_ST_PENDING,		/* Op is blocked from running */
+	FSCACHE_OP_ST_IN_PROGRESS,	/* Op is in progress */
+	FSCACHE_OP_ST_COMPLETE,		/* Op is complete */
+	FSCACHE_OP_ST_CANCELLED,	/* Op has been cancelled */
+	FSCACHE_OP_ST_DEAD		/* Op is now dead */
+};
+
 struct fscache_operation {
 	struct work_struct	work;		/* record for async ops */
 	struct list_head	pend_link;	/* link in object->pending_ops */
@@ -86,10 +96,10 @@ struct fscache_operation {
 #define FSCACHE_OP_MYTHREAD	0x0002	/* - processing is done be issuing thread, not pool */
 #define FSCACHE_OP_WAITING	4	/* cleared when op is woken */
 #define FSCACHE_OP_EXCLUSIVE	5	/* exclusive op, other ops must wait */
-#define FSCACHE_OP_DEAD		6	/* op is now dead */
-#define FSCACHE_OP_DEC_READ_CNT	7	/* decrement object->n_reads on destruction */
-#define FSCACHE_OP_KEEP_FLAGS	0xc0	/* flags to keep when repurposing an op */
+#define FSCACHE_OP_DEC_READ_CNT	6	/* decrement object->n_reads on destruction */
+#define FSCACHE_OP_KEEP_FLAGS	0x0070	/* flags to keep when repurposing an op */
 
+	enum fscache_operation_state state;
 	atomic_t		usage;
 	unsigned		debug_id;	/* debugging ID */
 
@@ -106,6 +116,7 @@ extern atomic_t fscache_op_debug_id;
 extern void fscache_op_work_func(struct work_struct *work);
 
 extern void fscache_enqueue_operation(struct fscache_operation *);
+extern void fscache_op_complete(struct fscache_operation *);
 extern void fscache_put_operation(struct fscache_operation *);
 
 /**
@@ -122,6 +133,7 @@ static inline void fscache_operation_init(struct fscache_operation *op,
 {
 	INIT_WORK(&op->work, fscache_op_work_func);
 	atomic_set(&op->usage, 1);
+	op->state = FSCACHE_OP_ST_INITIALISED;
 	op->debug_id = atomic_inc_return(&fscache_op_debug_id);
 	op->processor = processor;
 	op->release = release;
@@ -138,6 +150,7 @@ struct fscache_retrieval {
 	void			*context;	/* netfs read context (pinned) */
 	struct list_head	to_do;		/* list of things to be done by the backend */
 	unsigned long		start_time;	/* time at which retrieval started */
+	unsigned		n_pages;	/* number of pages to be retrieved */
 };
 
 typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op,
@@ -173,9 +186,23 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
 	fscache_enqueue_operation(&op->op);
 }
 
+/**
+ * fscache_retrieval_complete - Record (partial) completion of a retrieval
+ * @op: The retrieval operation affected
+ * @n_pages: The number of pages to account for
+ */
+static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
+					      int n_pages)
+{
+	op->n_pages -= n_pages;
+	if (op->n_pages <= 0)
+		fscache_op_complete(&op->op);
+}
+
 /**
  * fscache_put_retrieval - Drop a reference to a retrieval operation
  * @op: The retrieval operation affected
+ * @n_pages: The number of pages to account for
  *
  * Drop a reference to a retrieval operation.
  */
@@ -333,10 +360,10 @@ struct fscache_object {
 
 	int			debug_id;	/* debugging ID */
 	int			n_children;	/* number of child objects */
-	int			n_ops;		/* number of ops outstanding on object */
+	int			n_ops;		/* number of extant ops on object */
 	int			n_obj_ops;	/* number of object ops outstanding on object */
 	int			n_in_progress;	/* number of ops in progress */
-	int			n_exclusive;	/* number of exclusive ops queued */
+	int			n_exclusive;	/* number of exclusive ops queued or in progress */
 	atomic_t		n_reads;	/* number of read ops in progress */
 	spinlock_t		lock;		/* state and operations lock */
 
-- 
cgit v1.2.3


From ef778e7ae67cd426c30cad43378b908f5eb0bad5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:36 +0000
Subject: FS-Cache: Provide proper invalidation

Provide a proper invalidation method rather than relying on the netfs retiring
the cookie it has and getting a new one.  The problem with this is that isn't
easy for the netfs to make sure that it has completed/cancelled all its
outstanding storage and retrieval operations on the cookie it is retiring.

Instead, have the cache provide an invalidation method that will cancel or wait
for all currently outstanding operations before invalidating the cache, and
will cause new operations to queue up behind that.  Whilst invalidation is in
progress, some requests will be rejected until the cache can stack a barrier on
the operation queue to cause new operations to be deferred behind it.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h |  8 +++++++-
 include/linux/fscache.h       | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index f5facd1d333f..1e454ad7a832 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -254,6 +254,9 @@ struct fscache_cache_ops {
 	/* store the updated auxiliary data on an object */
 	void (*update_object)(struct fscache_object *object);
 
+	/* Invalidate an object */
+	void (*invalidate_object)(struct fscache_operation *op);
+
 	/* discard the resources pinned by an object and effect retirement if
 	 * necessary */
 	void (*drop_object)(struct fscache_object *object);
@@ -329,6 +332,7 @@ struct fscache_cookie {
 #define FSCACHE_COOKIE_FILLING		4	/* T if filling object incrementally */
 #define FSCACHE_COOKIE_UNAVAILABLE	5	/* T if cookie is unavailable (error, etc) */
 #define FSCACHE_COOKIE_WAITING_ON_READS	6	/* T if cookie is waiting on reads */
+#define FSCACHE_COOKIE_INVALIDATING	7	/* T if cookie is being invalidated */
 };
 
 extern struct fscache_cookie fscache_fsdef_index;
@@ -345,6 +349,7 @@ struct fscache_object {
 		/* active states */
 		FSCACHE_OBJECT_AVAILABLE,	/* cleaning up object after creation */
 		FSCACHE_OBJECT_ACTIVE,		/* object is usable */
+		FSCACHE_OBJECT_INVALIDATING,	/* object is invalidating */
 		FSCACHE_OBJECT_UPDATING,	/* object is updating */
 
 		/* terminal states */
@@ -378,7 +383,8 @@ struct fscache_object {
 #define FSCACHE_OBJECT_EV_RELEASE	4	/* T if netfs requested object release */
 #define FSCACHE_OBJECT_EV_RETIRE	5	/* T if netfs requested object retirement */
 #define FSCACHE_OBJECT_EV_WITHDRAW	6	/* T if cache requested object withdrawal */
-#define FSCACHE_OBJECT_EVENTS_MASK	0x7f	/* mask of all events*/
+#define FSCACHE_OBJECT_EV_INVALIDATE	7	/* T if cache requested object invalidation */
+#define FSCACHE_OBJECT_EVENTS_MASK	0xff	/* mask of all events*/
 
 	unsigned long		flags;
 #define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index f4b6353543bf..7a086235da4b 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -185,6 +185,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie(
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, int);
 extern void __fscache_update_cookie(struct fscache_cookie *);
 extern int __fscache_attr_changed(struct fscache_cookie *);
+extern void __fscache_invalidate(struct fscache_cookie *);
+extern void __fscache_wait_on_invalidate(struct fscache_cookie *);
 extern int __fscache_read_or_alloc_page(struct fscache_cookie *,
 					struct page *,
 					fscache_rw_complete_t,
@@ -389,6 +391,42 @@ int fscache_attr_changed(struct fscache_cookie *cookie)
 		return -ENOBUFS;
 }
 
+/**
+ * fscache_invalidate - Notify cache that an object needs invalidation
+ * @cookie: The cookie representing the cache object
+ *
+ * Notify the cache that an object is needs to be invalidated and that it
+ * should abort any retrievals or stores it is doing on the cache.  The object
+ * is then marked non-caching until such time as the invalidation is complete.
+ *
+ * This can be called with spinlocks held.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_invalidate(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_invalidate(cookie);
+}
+
+/**
+ * fscache_wait_on_invalidate - Wait for invalidation to complete
+ * @cookie: The cookie representing the cache object
+ *
+ * Wait for the invalidation of an object to complete.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_wait_on_invalidate(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_wait_on_invalidate(cookie);
+}
+
 /**
  * fscache_reserve_space - Reserve data space for a cached object
  * @cookie: The cookie representing the cache object
-- 
cgit v1.2.3


From a02de9608595c8ef649ef03ae735b0b45e3d4396 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:36 +0000
Subject: VFS: Make more complete truncate operation available to CacheFiles

Make a more complete truncate operation available to CacheFiles (including
security checks and suchlike) so that it can use this to clear invalidated
cache files.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a823d4be38e7..017a15b707e2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1999,6 +1999,7 @@ struct filename {
 	bool			separate; /* should "name" be freed? */
 };
 
+extern long vfs_truncate(struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
 extern int do_fallocate(struct file *file, int mode, loff_t offset,
-- 
cgit v1.2.3


From 36a02de5d7981435931d4608ee3e510b752e072b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Dec 2012 13:34:46 +0000
Subject: FS-Cache: Convert the object event ID #defines into an enum

Convert the fscache_object event IDs from #defines into an enum.  Also add an
extra label to the enum to carry the event count and redefine the event mask
in terms of that.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 1e454ad7a832..73e68c8d5df4 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -337,6 +337,23 @@ struct fscache_cookie {
 
 extern struct fscache_cookie fscache_fsdef_index;
 
+/*
+ * Event list for fscache_object::{event_mask,events}
+ */
+enum {
+	FSCACHE_OBJECT_EV_REQUEUE,	/* T if object should be requeued */
+	FSCACHE_OBJECT_EV_UPDATE,	/* T if object should be updated */
+	FSCACHE_OBJECT_EV_INVALIDATE,	/* T if cache requested object invalidation */
+	FSCACHE_OBJECT_EV_CLEARED,	/* T if accessors all gone */
+	FSCACHE_OBJECT_EV_ERROR,	/* T if fatal error occurred during processing */
+	FSCACHE_OBJECT_EV_RELEASE,	/* T if netfs requested object release */
+	FSCACHE_OBJECT_EV_RETIRE,	/* T if netfs requested object retirement */
+	FSCACHE_OBJECT_EV_WITHDRAW,	/* T if cache requested object withdrawal */
+	NR_FSCACHE_OBJECT_EVENTS
+};
+
+#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1)
+
 /*
  * on-disk cache file or index handle
  */
@@ -376,15 +393,6 @@ struct fscache_object {
 	unsigned long		event_mask;	/* events this object is interested in */
 	unsigned long		events;		/* events to be processed by this object
 						 * (order is important - using fls) */
-#define FSCACHE_OBJECT_EV_REQUEUE	0	/* T if object should be requeued */
-#define FSCACHE_OBJECT_EV_UPDATE	1	/* T if object should be updated */
-#define FSCACHE_OBJECT_EV_CLEARED	2	/* T if accessors all gone */
-#define FSCACHE_OBJECT_EV_ERROR		3	/* T if fatal error occurred during processing */
-#define FSCACHE_OBJECT_EV_RELEASE	4	/* T if netfs requested object release */
-#define FSCACHE_OBJECT_EV_RETIRE	5	/* T if netfs requested object retirement */
-#define FSCACHE_OBJECT_EV_WITHDRAW	6	/* T if cache requested object withdrawal */
-#define FSCACHE_OBJECT_EV_INVALIDATE	7	/* T if cache requested object invalidation */
-#define FSCACHE_OBJECT_EVENTS_MASK	0xff	/* mask of all events*/
 
 	unsigned long		flags;
 #define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
-- 
cgit v1.2.3


From 1f372dff1da37e2b36ae9085368fa46896398598 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 Dec 2012 20:03:13 +0000
Subject: FS-Cache: Mark cancellation of in-progress operation

Mark as cancelled an operation that is in progress rather than pending at the
time it is cancelled, and call fscache_complete_op() to cancel an operation so
that blocked ops can be started.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 73e68c8d5df4..5dfa0aa216b6 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -116,7 +116,7 @@ extern atomic_t fscache_op_debug_id;
 extern void fscache_op_work_func(struct work_struct *work);
 
 extern void fscache_enqueue_operation(struct fscache_operation *);
-extern void fscache_op_complete(struct fscache_operation *);
+extern void fscache_op_complete(struct fscache_operation *, bool);
 extern void fscache_put_operation(struct fscache_operation *);
 
 /**
@@ -196,7 +196,7 @@ static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
 {
 	op->n_pages -= n_pages;
 	if (op->n_pages <= 0)
-		fscache_op_complete(&op->op);
+		fscache_op_complete(&op->op, true);
 }
 
 /**
-- 
cgit v1.2.3


From d30357f2f0ec0bfb67fd39f8f76d22d02d78631e Mon Sep 17 00:00:00 2001
From: Marco Stornelli <marco.stornelli@gmail.com>
Date: Sat, 15 Dec 2012 11:59:20 +0100
Subject: vfs: drop vmtruncate

Removed vmtruncate

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a823d4be38e7..a0c5ba57ffc5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1565,7 +1565,6 @@ struct inode_operations {
 	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
-	void (*truncate) (struct inode *);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
-- 
cgit v1.2.3


From 7898575fc81bd707ce0844cb06874d48e39bbe09 Mon Sep 17 00:00:00 2001
From: Marco Stornelli <marco.stornelli@gmail.com>
Date: Sat, 15 Dec 2012 12:00:02 +0100
Subject: mm: drop vmtruncate

Removed vmtruncate

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7f4f906190bd..63204078f72b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1007,7 +1007,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 
 extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
-extern int vmtruncate(struct inode *inode, loff_t offset);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
-- 
cgit v1.2.3


From 471667391a92bf7bf2cd4ff31a3ad88e5dec934b Mon Sep 17 00:00:00 2001
From: Alessio Igor Bogani <abogani@kernel.org>
Date: Thu, 13 Dec 2012 12:22:39 +0100
Subject: vfs: Remove useless function prototypes

Commit 8e22cc88d68ca1a46d7d582938f979eb640ed30f removes the (un)lock_super
function definitions but forgets to remove their prototypes.

Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a0c5ba57ffc5..05cd238ad941 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1445,10 +1445,6 @@ static inline void sb_start_intwrite(struct super_block *sb)
 
 extern bool inode_owner_or_capable(const struct inode *inode);
 
-/* not quite ready to be deprecated, but... */
-extern void lock_super(struct super_block *);
-extern void unlock_super(struct super_block *);
-
 /*
  * VFS helper functions..
  */
-- 
cgit v1.2.3


From b9d6ba94b875192ef5e2dab92d72beea33b83c3d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 20 Dec 2012 14:59:40 -0500
Subject: vfs: add a retry_estale helper function to handle retries on ESTALE

This function is expected to be called from path-based syscalls to help
them decide whether to try the lookup and call again in the event that
they got an -ESTALE return back on an earier try.

Currently, we only retry the call once on an ESTALE error, but in the
event that we decide that that's not enough in the future, we should be
able to change the logic in this helper without too much effort.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4bf19d8174ed..66542b644804 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -98,4 +98,20 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/**
+ * retry_estale - determine whether the caller should retry an operation
+ * @error: the error that would currently be returned
+ * @flags: flags being used for next lookup attempt
+ *
+ * Check to see if the error code was -ESTALE, and then determine whether
+ * to retry the call based on whether "flags" already has LOOKUP_REVAL set.
+ *
+ * Returns true if the caller should try the operation again.
+ */
+static inline bool
+retry_estale(const long error, const unsigned int flags)
+{
+	return error == -ESTALE && !(flags & LOOKUP_REVAL);
+}
+
 #endif /* _LINUX_NAMEI_H */
-- 
cgit v1.2.3


From 1ac12b4b6d707937f9de6d09622823b2fd0c93ef Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 11 Dec 2012 12:10:06 -0500
Subject: vfs: turn is_dir argument to kern_path_create into a lookup_flags arg

Where we can pass in LOOKUP_DIRECTORY or LOOKUP_REVAL. Any other flags
passed in here are currently ignored.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 66542b644804..e998c030061d 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -65,8 +65,8 @@ extern int user_path_at_empty(int, const char __user *, unsigned, struct path *,
 
 extern int kern_path(const char *, unsigned, struct path *);
 
-extern struct dentry *kern_path_create(int, const char *, struct path *, int);
-extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
+extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
+extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
-- 
cgit v1.2.3


From b66c5984017533316fd1951770302649baf1aa33 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 20 Dec 2012 15:05:16 -0800
Subject: exec: do not leave bprm->interp on stack

If a series of scripts are executed, each triggering module loading via
unprintable bytes in the script header, kernel stack contents can leak
into the command line.

Normally execution of binfmt_script and binfmt_misc happens recursively.
However, when modules are enabled, and unprintable bytes exist in the
bprm->buf, execution will restart after attempting to load matching
binfmt modules.  Unfortunately, the logic in binfmt_script and
binfmt_misc does not expect to get restarted.  They leave bprm->interp
pointing to their local stack.  This means on restart bprm->interp is
left pointing into unused stack memory which can then be copied into the
userspace argv areas.

After additional study, it seems that both recursion and restart remains
the desirable way to handle exec with scripts, misc, and modules.  As
such, we need to protect the changes to interp.

This changes the logic to require allocation for any changes to the
bprm->interp.  To avoid adding a new kmalloc to every exec, the default
value is left as-is.  Only when passing through binfmt_script or
binfmt_misc does an allocation take place.

For a proof of concept, see DoTest.sh from:

   http://www.halfdog.net/Security/2012/LinuxKernelBinfmtScriptStackDataDisclosure/

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: halfdog <me@halfdog.net>
Cc: P J P <ppandit@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/binfmts.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a4c2b565c835..bdf3965f0a29 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -112,6 +112,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
+extern int bprm_change_interp(char *interp, struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc, const char *const *argv,
 			       struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
-- 
cgit v1.2.3


From c4e18497d8fd92eef2c6e7eadcc1a107ccd115ea Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 20 Dec 2012 15:05:42 -0800
Subject: linux/kernel.h: fix DIV_ROUND_CLOSEST with unsigned divisors

Commit 263a523d18bc ("linux/kernel.h: Fix warning seen with W=1 due to
change in DIV_ROUND_CLOSEST") fixes a warning seen with W=1 due to
change in DIV_ROUND_CLOSEST.

Unfortunately, the C compiler converts divide operations with unsigned
divisors to unsigned, even if the dividend is signed and negative (for
example, -10 / 5U = 858993457).  The C standard says "If one operand has
unsigned int type, the other operand is converted to unsigned int", so
the compiler is not to blame.  As a result, DIV_ROUND_CLOSEST(0, 2U) and
similar operations now return bad values, since the automatic conversion
of expressions such as "0 - 2U/2" to unsigned was not taken into
account.

Fix by checking for the divisor variable type when deciding which
operation to perform.  This fixes DIV_ROUND_CLOSEST(0, 2U), but still
returns bad values for negative dividends divided by unsigned divisors.
Mark the latter case as unsupported.

One observed effect of this problem is that the s2c_hwmon driver reports
a value of 4198403 instead of 0 if the ADC reads 0.

Other impact is unpredictable.  Problem is seen if the divisor is an
unsigned variable or constant and the dividend is less than (divisor/2).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Juergen Beisert <jbe@pengutronix.de>
Tested-by: Juergen Beisert <jbe@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: <stable@vger.kernel.org>	[3.7.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d140e8fb075f..c566927efcbd 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -77,13 +77,15 @@
 
 /*
  * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors.
+ * to closest integer. Result is undefined for negative divisors and
+ * for negative dividends if the divisor variable type is unsigned.
  */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
 	typeof(x) __x = x;				\
 	typeof(divisor) __d = divisor;			\
-	(((typeof(x))-1) > 0 || (__x) > 0) ?		\
+	(((typeof(x))-1) > 0 ||				\
+	 ((typeof(divisor))-1) > 0 || (__x) > 0) ?	\
 		(((__x) + ((__d) / 2)) / (__d)) :	\
 		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
-- 
cgit v1.2.3


From d54eaa5a0fde0a202e4e91f200f818edcef15bee Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 21 Dec 2012 20:23:36 +0000
Subject: dm: prepare to support WRITE SAME

Allow targets to opt in to WRITE SAME support by setting
'num_write_same_requests' in the dm_target structure.

A dm device will only advertise WRITE SAME support if all its
targets and all its underlying devices support it.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/device-mapper.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 38d27a10aa5d..d1f6cd8486f2 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -205,6 +205,11 @@ struct dm_target {
 	 */
 	unsigned num_discard_requests;
 
+	/*
+	 * The number of WRITE SAME requests that will be submitted to the target.
+	 */
+	unsigned num_write_same_requests;
+
 	/* target specific data */
 	void *private;
 
-- 
cgit v1.2.3


From c0820cf5ad09522bdd9ff68e84841a09c9f339d8 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 21 Dec 2012 20:23:38 +0000
Subject: dm: introduce per_bio_data

Introduce a field per_bio_data_size in struct dm_target.

Targets can set this field in the constructor. If a target sets this
field to a non-zero value, "per_bio_data_size" bytes of auxiliary data
are allocated for each bio submitted to the target. These data can be
used for any purpose by the target and help us improve performance by
removing some per-target mempools.

Per-bio data is accessed with dm_per_bio_data. The
argument data_size must be the same as the value per_bio_data_size in
dm_target.

If the target has a pointer to per_bio_data, it can get a pointer to
the bio with dm_bio_from_per_bio_data() function (data_size must be the
same as the value passed to dm_per_bio_data).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/device-mapper.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index d1f6cd8486f2..6f0e73b4a80d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -210,6 +210,12 @@ struct dm_target {
 	 */
 	unsigned num_write_same_requests;
 
+	/*
+	 * The minimum number of extra bytes allocated in each bio for the
+	 * target to use.  dm_per_bio_data returns the data location.
+	 */
+	unsigned per_bio_data_size;
+
 	/* target specific data */
 	void *private;
 
@@ -246,6 +252,30 @@ struct dm_target_callbacks {
 	int (*congested_fn) (struct dm_target_callbacks *, int);
 };
 
+/*
+ * For bio-based dm.
+ * One of these is allocated for each bio.
+ * This structure shouldn't be touched directly by target drivers.
+ * It is here so that we can inline dm_per_bio_data and
+ * dm_bio_from_per_bio_data
+ */
+struct dm_target_io {
+	struct dm_io *io;
+	struct dm_target *ti;
+	union map_info info;
+	struct bio clone;
+};
+
+static inline void *dm_per_bio_data(struct bio *bio, size_t data_size)
+{
+	return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
+}
+
+static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
+{
+	return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
+}
+
 int dm_register_target(struct target_type *t);
 void dm_unregister_target(struct target_type *t);
 
-- 
cgit v1.2.3


From ddbd658f6446a35e4d6ba84812fd71023320cae2 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 21 Dec 2012 20:23:39 +0000
Subject: dm: move target request nr to dm_target_io

This patch moves target_request_nr from map_info to dm_target_io and
makes it accessible with dm_bio_get_target_request_nr.

This patch is a preparation for the next patch that removes map_info.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/device-mapper.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 6f0e73b4a80d..eb96ef6fd8b7 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
 union map_info {
 	void *ptr;
 	unsigned long long ll;
-	unsigned target_request_nr;
 };
 
 /*
@@ -193,20 +192,21 @@ struct dm_target {
 	 * A number of zero-length barrier requests that will be submitted
 	 * to the target for the purpose of flushing cache.
 	 *
-	 * The request number will be placed in union map_info->target_request_nr.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 * It is a responsibility of the target driver to remap these requests
 	 * to the real underlying devices.
 	 */
 	unsigned num_flush_requests;
 
 	/*
-	 * The number of discard requests that will be submitted to the
-	 * target.  map_info->request_nr is used just like num_flush_requests.
+	 * The number of discard requests that will be submitted to the target.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 */
 	unsigned num_discard_requests;
 
 	/*
 	 * The number of WRITE SAME requests that will be submitted to the target.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 */
 	unsigned num_write_same_requests;
 
@@ -263,6 +263,7 @@ struct dm_target_io {
 	struct dm_io *io;
 	struct dm_target *ti;
 	union map_info info;
+	unsigned target_request_nr;
 	struct bio clone;
 };
 
@@ -276,6 +277,11 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
 	return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
 }
 
+static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio)
+{
+	return container_of(bio, struct dm_target_io, clone)->target_request_nr;
+}
+
 int dm_register_target(struct target_type *t);
 void dm_unregister_target(struct target_type *t);
 
-- 
cgit v1.2.3


From 7de3ee57da4b717050e79c9313a9bf66ccc72519 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 21 Dec 2012 20:23:41 +0000
Subject: dm: remove map_info

This patch removes map_info from bio-based device mapper targets.
map_info is still used for request-based targets.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/device-mapper.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index eb96ef6fd8b7..bf6afa2fc432 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -45,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
  * = 1: simple remap complete
  * = 2: The target wants to push back the io
  */
-typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
-			  union map_info *map_context);
+typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
 typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
 				  union map_info *map_context);
 
@@ -59,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct bio *bio, int error,
-			    union map_info *map_context);
+			    struct bio *bio, int error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
 				    struct request *clone, int error,
 				    union map_info *map_context);
-- 
cgit v1.2.3


From 30e6c9fa93cf3dbc7cc6df1d748ad25e4264545a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 20 Dec 2012 17:25:08 +0000
Subject: net: devnet_rename_seq should be a seqcount

Using a seqlock for devnet_rename_seq is not a good idea,
as device_rename() can sleep.

As we hold RTNL, we dont need a protection for writers,
and only need a seqcount so that readers can catch a change done
by a writer.

Bug added in commit c91f6df2db4972d3 (sockopt: Change getsockopt() of
SO_BINDTODEVICE to return an interface name)

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 02e0f6b156c3..c599e4782d45 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1576,7 +1576,7 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
-extern seqlock_t	devnet_rename_seq;	/* Device rename lock */
+extern seqcount_t	devnet_rename_seq;	/* Device rename seq */
 
 
 #define for_each_netdev(net, d)		\
-- 
cgit v1.2.3


From 53e872681fed6a43047e71bf927f77d06f467988 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Dec 2012 13:29:52 -0500
Subject: ext4: fix deadlock in journal_unmap_buffer()

We cannot wait for transaction commit in journal_unmap_buffer()
because we hold page lock which ranks below transaction start.  We
solve the issue by bailing out of journal_unmap_buffer() and
jbd2_journal_invalidatepage() with -EBUSY.  Caller is then responsible
for waiting for transaction commit to finish and try invalidation
again. Since the issue can happen only for page stradding i_size, it
is simple enough to manually call jbd2_journal_invalidatepage() for
such page from ext4_setattr(), check the return value and wait if
necessary.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 1be23d9fdacb..e30b66346942 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1098,7 +1098,7 @@ void		 jbd2_journal_set_triggers(struct buffer_head *,
 extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
-extern void	 jbd2_journal_invalidatepage(journal_t *,
+extern int	 jbd2_journal_invalidatepage(journal_t *,
 				struct page *, unsigned long);
 extern int	 jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
 extern int	 jbd2_journal_stop(handle_t *);
-- 
cgit v1.2.3


From 08b60f8438879a84246d7debded31c9cb7aea6e4 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@wwwdotorg.org>
Date: Mon, 24 Dec 2012 11:14:58 -0700
Subject: namei.h: include errno.h

This solves:

In file included from fs/ext3/symlink.c:20:0:
include/linux/namei.h: In function 'retry_estale':
include/linux/namei.h:114:19: error: 'ESTALE' undeclared (first use in this function)

Signed-off-by: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index e998c030061d..5a5ff57ceed4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -2,6 +2,7 @@
 #define _LINUX_NAMEI_H
 
 #include <linux/dcache.h>
+#include <linux/errno.h>
 #include <linux/linkage.h>
 #include <linux/path.h>
 
-- 
cgit v1.2.3


From c876ad7682155958d0c9c27afe9017925c230d64 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 21 Dec 2012 20:27:12 -0800
Subject: pidns: Stop pid allocation when init dies

Oleg pointed out that in a pid namespace the sequence.
- pid 1 becomes a zombie
- setns(thepidns), fork,...
- reaping pid 1.
- The injected processes exiting.

Can lead to processes attempting access their child reaper and
instead following a stale pointer.

That waitpid for init can return before all of the processes in
the pid namespace have exited is also unfortunate.

Avoid these problems by disabling the allocation of new pids in a pid
namespace when init dies, instead of when the last process in a pid
namespace is reaped.

Pointed-out-by:  Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/pid.h           | 1 +
 include/linux/pid_namespace.h | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index b152d44fb181..2381c973d897 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -121,6 +121,7 @@ int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
+extern void disable_pid_allocation(struct pid_namespace *ns);
 
 /*
  * ns_of_pid() returns the pid namespace in which the specified pid was
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index bf285999273a..215e5e3dda10 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,7 +21,7 @@ struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
-	int nr_hashed;
+	unsigned int nr_hashed;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
 	unsigned int level;
@@ -42,6 +42,8 @@ struct pid_namespace {
 
 extern struct pid_namespace init_pid_ns;
 
+#define PIDNS_HASH_ADDING (1U << 31)
+
 #ifdef CONFIG_PID_NS
 static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
-- 
cgit v1.2.3


From 812089e01b9f65f90fc8fc670d8cce72a0e01fbb Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Sat, 1 Dec 2012 12:37:20 -0800
Subject: PCI: Reduce Ricoh 0xe822 SD card reader base clock frequency to 50MHz

Otherwise it fails like this on cards like the Transcend 16GB SDHC card:

    mmc0: new SDHC card at address b368
    mmcblk0: mmc0:b368 SDC   15.0 GiB
    mmcblk0: error -110 sending status command, retrying
    mmcblk0: error -84 transferring data, sector 0, nr 8, cmd response 0x900, card status 0xb0

Tested on my Lenovo x200 laptop.

[bhelgaas: changelog]
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Chris Ball <cjb@laptop.org>
CC: Manoj Iyer <manoj.iyer@canonical.com>
CC: stable@vger.kernel.org
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f8447376ddb..0eb65796bcb9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1568,6 +1568,7 @@
 #define PCI_DEVICE_ID_RICOH_RL5C476	0x0476
 #define PCI_DEVICE_ID_RICOH_RL5C478	0x0478
 #define PCI_DEVICE_ID_RICOH_R5C822	0x0822
+#define PCI_DEVICE_ID_RICOH_R5CE822	0xe822
 #define PCI_DEVICE_ID_RICOH_R5CE823	0xe823
 #define PCI_DEVICE_ID_RICOH_R5C832	0x0832
 #define PCI_DEVICE_ID_RICOH_R5C843	0x0843
-- 
cgit v1.2.3


From ad4b3fb7ff9940bcdb1e4cd62bd189d10fa636ba Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@cs.columbia.edu>
Date: Fri, 21 Dec 2012 13:03:50 -0500
Subject: mm: Fix PageHead when !CONFIG_PAGEFLAGS_EXTENDED

Unfortunately with !CONFIG_PAGEFLAGS_EXTENDED, (!PageHead) is false, and
(PageHead) is true, for tail pages.  If this is indeed the intended
behavior, which I doubt because it breaks cache cleaning on some ARM
systems, then the nomenclature is highly problematic.

This patch makes sure PageHead is only true for head pages and PageTail
is only true for tail pages, and neither is true for non-compound pages.

[ This buglet seems ancient - seems to have been introduced back in Apr
  2008 in commit 6a1e7f777f61: "pageflags: convert to the use of new
  macros".  And the reason nobody noticed is because the PageHead()
  tests are almost all about just sanity-checking, and only used on
  pages that are actual page heads.  The fact that the old code returned
  true for tail pages too was thus not really noticeable.   - Linus ]

Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
Acked-by:  Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Will Deacon <Will.Deacon@arm.com>
Cc: Steve Capper <Steve.Capper@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: stable@kernel.org  # 2.6.26+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b5d13841604e..70473da47b3f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -362,7 +362,7 @@ static inline void ClearPageCompound(struct page *page)
  * pages on the LRU and/or pagecache.
  */
 TESTPAGEFLAG(Compound, compound)
-__PAGEFLAG(Head, compound)
+__SETPAGEFLAG(Head, compound)  __CLEARPAGEFLAG(Head, compound)
 
 /*
  * PG_reclaim is used in combination with PG_compound to mark the
@@ -374,8 +374,14 @@ __PAGEFLAG(Head, compound)
  * PG_compound & PG_reclaim	=> Tail page
  * PG_compound & ~PG_reclaim	=> Head page
  */
+#define PG_head_mask ((1L << PG_compound))
 #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
 
+static inline int PageHead(struct page *page)
+{
+	return ((page->flags & PG_head_tail_mask) == PG_head_mask);
+}
+
 static inline int PageTail(struct page *page)
 {
 	return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask);
-- 
cgit v1.2.3


From a7a88b23737095e6c18a20c5d4eef9e25ec5b829 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 2 Jan 2013 02:04:23 -0800
Subject: mempolicy: remove arg from mpol_parse_str, mpol_to_str

Remove the unused argument (formerly no_context) from mpol_parse_str()
and from mpol_to_str().

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 9adc270de7ef..92bc9988a180 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -165,11 +165,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 
 
 #ifdef CONFIG_TMPFS
-extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
+extern int mpol_parse_str(char *str, struct mempolicy **mpol);
 #endif
 
-extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
-			int no_context);
+extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -296,15 +295,13 @@ static inline void check_highest_zone(int k)
 }
 
 #ifdef CONFIG_TMPFS
-static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
-				int no_context)
+static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
 {
 	return 1;	/* error */
 }
 #endif
 
-static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
-				int no_context)
+static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 42288fe366c4f1ce7522bc9f27d0bc2a81c55264 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 21 Dec 2012 23:10:25 +0000
Subject: mm: mempolicy: Convert shared_policy mutex to spinlock

Sasha was fuzzing with trinity and reported the following problem:

  BUG: sleeping function called from invalid context at kernel/mutex.c:269
  in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main
  2 locks held by trinity-main/6361:
   #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff810aa314>] __do_page_fault+0x1e4/0x4f0
   #1:  (&(&mm->page_table_lock)->rlock){+.+...}, at: [<ffffffff8122f017>] handle_pte_fault+0x3f7/0x6a0
  Pid: 6361, comm: trinity-main Tainted: G        W
  3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74
  Call Trace:
    __might_sleep+0x1c3/0x1e0
    mutex_lock_nested+0x29/0x50
    mpol_shared_policy_lookup+0x2e/0x90
    shmem_get_policy+0x2e/0x30
    get_vma_policy+0x5a/0xa0
    mpol_misplaced+0x41/0x1d0
    handle_pte_fault+0x465/0x6a0

This was triggered by a different version of automatic NUMA balancing
but in theory the current version is vunerable to the same problem.

do_numa_page
  -> numa_migrate_prep
    -> mpol_misplaced
      -> get_vma_policy
        -> shmem_get_policy

It's very unlikely this will happen as shared pages are not marked
pte_numa -- see the page_mapcount() check in change_pte_range() -- but
it is possible.

To address this, this patch restores sp->lock as originally implemented
by Kosaki Motohiro.  In the path where get_vma_policy() is called, it
should not be calling sp_alloc() so it is not necessary to treat the PTL
specially.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 92bc9988a180..0d7df39a5885 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -123,7 +123,7 @@ struct sp_node {
 
 struct shared_policy {
 	struct rb_root root;
-	struct mutex mutex;
+	spinlock_t lock;
 };
 
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
-- 
cgit v1.2.3


From 3d33fcc11bdd11b6949cf5c406726a094395dc4f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 2 Jan 2013 15:12:55 +0000
Subject: UAPI: Remove empty Kbuild files

Empty files can get deleted by the patch program, so remove empty Kbuild
files and their links from the parent Kbuilds.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild      | 5 -----
 include/linux/hdlc/Kbuild | 0
 include/linux/hsi/Kbuild  | 0
 include/linux/raid/Kbuild | 0
 include/linux/usb/Kbuild  | 0
 5 files changed, 5 deletions(-)
 delete mode 100644 include/linux/Kbuild
 delete mode 100644 include/linux/hdlc/Kbuild
 delete mode 100644 include/linux/hsi/Kbuild
 delete mode 100644 include/linux/raid/Kbuild
 delete mode 100644 include/linux/usb/Kbuild

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
deleted file mode 100644
index 7fe2dae251e5..000000000000
--- a/include/linux/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-header-y += dvb/
-header-y += hdlc/
-header-y += hsi/
-header-y += raid/
-header-y += usb/
diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/hsi/Kbuild b/include/linux/hsi/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
-- 
cgit v1.2.3


From f568f6ca811fe681ecfd11c4ce78b6aa488020c0 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Dec 2012 15:02:05 -0800
Subject: pstore: remove __dev* attributes.

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit from the pstore filesystem.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: Anton Vorontsov <cbouatmailru@gmail.com>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pstore_ram.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 098d2a838296..cb6ab5feab67 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -46,9 +46,8 @@ struct persistent_ram_zone {
 	size_t old_log_size;
 };
 
-struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start,
-							  size_t size, u32 sig,
-							  int ecc_size);
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+					       u32 sig, int ecc_size);
 void persistent_ram_free(struct persistent_ram_zone *prz);
 void persistent_ram_zap(struct persistent_ram_zone *prz);
 
-- 
cgit v1.2.3


From 0f58a01ddd5e8177255705ba15e64c3b74d67993 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Dec 2012 15:12:59 -0800
Subject: Drivers: bcma: remove __dev* attributes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit, __devexit_p, and __devexit
from these drivers.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: "Rafał Miłecki" <zajec5@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/bcma/bcma_driver_gmac_cmn.h | 2 +-
 include/linux/bcma/bcma_driver_pci.h      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h
index def894b83b0d..4dd1f33e36a2 100644
--- a/include/linux/bcma/bcma_driver_gmac_cmn.h
+++ b/include/linux/bcma/bcma_driver_gmac_cmn.h
@@ -92,7 +92,7 @@ struct bcma_drv_gmac_cmn {
 #define gmac_cmn_write32(gc, offset, val)	bcma_write32((gc)->core, offset, val)
 
 #ifdef CONFIG_BCMA_DRIVER_GMAC_CMN
-extern void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc);
+extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc);
 #else
 static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { }
 #endif
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 41da581e1612..c48d98d27b77 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -214,7 +214,7 @@ struct bcma_drv_pci {
 #define pcicore_write16(pc, offset, val)	bcma_write16((pc)->core, offset, val)
 #define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
 
-extern void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc);
+extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
 extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc,
 				 struct bcma_device *core, bool enable);
 extern void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend);
-- 
cgit v1.2.3


From e389623a68622e3c9be440ab522fac1aa1ca3454 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Dec 2012 15:15:49 -0800
Subject: include: remove __dev* attributes.

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit from some include files that
were previously missed.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ata_platform.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index fe9989636b62..b9fde17f767c 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -15,12 +15,12 @@ struct pata_platform_info {
 	unsigned int irq_flags;
 };
 
-extern int __devinit __pata_platform_probe(struct device *dev,
-					   struct resource *io_res,
-					   struct resource *ctl_res,
-					   struct resource *irq_res,
-					   unsigned int ioport_shift,
-					   int __pio_mask);
+extern int __pata_platform_probe(struct device *dev,
+				 struct resource *io_res,
+				 struct resource *ctl_res,
+				 struct resource *irq_res,
+				 unsigned int ioport_shift,
+				 int __pio_mask);
 
 /*
  * Marvell SATA private data
-- 
cgit v1.2.3


From 03f595668017f1a1fb971c02fc37140bc6e7bb1c Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 4 Jan 2013 15:34:50 -0800
Subject: ipc: add sysctl to specify desired next object id

Add 3 new variables and sysctls to tune them (by one "next_id" variable
for messages, semaphores and shared memory respectively).  This variable
can be used to set desired id for next allocated IPC object.  By default
it's equal to -1 and old behaviour is preserved.  If this variable is
non-negative, then desired idr will be extracted from it and used as a
start value to search for free IDR slot.

Notes:

1) this patch doesn't guarantee that the new object will have desired
   id.  So it's up to user space how to handle new object with wrong id.

2) After a sucessful id allocation attempt, "next_id" will be set back
   to -1 (if it was non-negative).

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index fe771978e877..ae221a7b5092 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -24,6 +24,7 @@ struct ipc_ids {
 	unsigned short seq_max;
 	struct rw_semaphore rw_mutex;
 	struct idr ipcs_idr;
+	int next_id;
 };
 
 struct ipc_namespace {
-- 
cgit v1.2.3


From f9dd87f4738c7555aca2cdf8cb2b2326cafb0cad Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 4 Jan 2013 15:34:52 -0800
Subject: ipc: message queue receive cleanup

Move all message related manipulation into one function msg_fill().
Actually, two functions because of the compat one.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 7a4b9e97d29a..fc5743a554e6 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -34,7 +34,8 @@ struct msg_queue {
 /* Helper routines for sys_msgsnd and sys_msgrcv */
 extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
 			size_t msgsz, int msgflg);
-extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
-			size_t msgsz, long msgtyp, int msgflg);
+extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
+		      int msgflg,
+		      long (*msg_fill)(void __user *, struct msg_msg *, size_t));
 
 #endif /* _LINUX_MSG_H */
-- 
cgit v1.2.3


From 3a665531a3b7c2ad2c87903b24646be6916340e4 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 4 Jan 2013 15:34:56 -0800
Subject: selftests: IPC message queue copy feature test

This test can be used to check wheither kernel supports IPC message queue
copy and restore features (required by CRIU project).

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index fc5743a554e6..391af8d11cce 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -36,6 +36,7 @@ extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
 			size_t msgsz, int msgflg);
 extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
 		      int msgflg,
-		      long (*msg_fill)(void __user *, struct msg_msg *, size_t));
+		      long (*msg_fill)(void __user *, struct msg_msg *,
+				       size_t));
 
 #endif /* _LINUX_MSG_H */
-- 
cgit v1.2.3


From a458431e176ddb27e8ef8b98c2a681b217337393 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Fri, 4 Jan 2013 15:35:08 -0800
Subject: mm: fix zone_watermark_ok_safe() accounting of isolated pages

Commit 702d1a6e0766 ("memory-hotplug: fix kswapd looping forever
problem") added an isolated pageblocks counter (nr_pageblock_isolate in
struct zone) and used it to adjust free pages counter in
zone_watermark_ok_safe() to prevent kswapd looping forever problem.

Then later, commit 2139cbe627b8 ("cma: fix counting of isolated pages")
fixed accounting of isolated pages in global free pages counter.  It
made the previous zone_watermark_ok_safe() fix unnecessary and
potentially harmful (cause now isolated pages may be accounted twice
making free pages counter incorrect).

This patch removes the special isolated pageblocks counter altogether
which fixes zone_watermark_ok_safe() free pages check.

Reported-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Aaditya Kumar <aaditya.kumar.30@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4bec5be82cab..73b64a38b984 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -503,14 +503,6 @@ struct zone {
 	 * rarely used fields:
 	 */
 	const char		*name;
-#ifdef CONFIG_MEMORY_ISOLATION
-	/*
-	 * the number of MIGRATE_ISOLATE *pageblock*.
-	 * We need this for free page counting. Look at zone_watermark_ok_safe.
-	 * It's protected by zone->lock
-	 */
-	int		nr_pageblock_isolate;
-#endif
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
-- 
cgit v1.2.3


From 08c097fc3bb283299a6915a6a3795edab85979b1 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.c.dionne@gmail.com>
Date: Wed, 9 Jan 2013 14:16:30 +0000
Subject: cred: Remove tgcred pointer from struct cred

Commit 3a50597de863 ("KEYS: Make the session and process keyrings
per-thread") removed the definition of the thread_group_cred structure,
but left a now unused pointer in struct cred.

Signed-off-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cred.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index abb2cd50f6b2..04421e825365 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -128,7 +128,6 @@ struct cred {
 	struct key	*process_keyring; /* keyring private to this process */
 	struct key	*thread_keyring; /* keyring private to this thread */
 	struct key	*request_key_auth; /* assumed request_key authority */
-	struct thread_group_cred *tgcred; /* thread-group shared credentials */
 #endif
 #ifdef CONFIG_SECURITY
 	void		*security;	/* subjective LSM security */
-- 
cgit v1.2.3


From 54b956b903607f8f8878754dd4352da6a54a1da2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 10 Jan 2013 10:57:01 -0800
Subject: Remove __dev* markings from init.h

Now that all in-kernel users of __dev* are gone, let's remove them from
init.h to keep them from popping up again and again.

Thanks to Bill Pemberton for doing all of the hard work to make removal
of this possible.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/init.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index a799273714ac..10ed4f436458 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -93,14 +93,6 @@
 
 #define __exit          __section(.exit.text) __exitused __cold notrace
 
-/* Used for HOTPLUG, but that is always enabled now, so just make them noops */
-#define __devinit
-#define __devinitdata
-#define __devinitconst
-#define __devexit
-#define __devexitdata
-#define __devexitconst
-
 /* Used for HOTPLUG_CPU */
 #define __cpuinit        __section(.cpuinit.text) __cold notrace
 #define __cpuinitdata    __section(.cpuinit.data)
@@ -337,18 +329,6 @@ void __init parse_early_options(char *cmdline);
 #define __INITRODATA_OR_MODULE __INITRODATA
 #endif /*CONFIG_MODULES*/
 
-/* Functions marked as __devexit may be discarded at kernel link time, depending
-   on config options.  Newer versions of binutils detect references from
-   retained sections to discarded sections and flag an error.  Pointers to
-   __devexit functions must use __devexit_p(function_name), the wrapper will
-   insert either the function_name or NULL, depending on the config options.
- */
-#if defined(MODULE) || defined(CONFIG_HOTPLUG)
-#define __devexit_p(x) x
-#else
-#define __devexit_p(x) NULL
-#endif
-
 #ifdef MODULE
 #define __exit_p(x) x
 #else
-- 
cgit v1.2.3


From 896f97ea95c1d29c0520ee0766b66b7f64cb967c Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@googlers.com>
Date: Fri, 11 Jan 2013 14:31:36 -0800
Subject: lib: cpu_rmap: avoid flushing all workqueues

In some cases, free_irq_cpu_rmap() is called while holding a lock (eg
rtnl).  This can lead to deadlocks, because it invokes
flush_scheduled_work() which ends up waiting for whole system workqueue
to flush, but some pending works might try to acquire the lock we are
already holding.

This commit uses reference-counting to replace
irq_run_affinity_notifiers().  It also removes
irq_run_affinity_notifiers() altogether.

[akpm@linux-foundation.org: eliminate free_cpu_rmap, rename cpu_rmap_reclaim() to cpu_rmap_release(), propagate kref_put() retval from cpu_rmap_put()]
Signed-off-by: David Decotigny <decot@googlers.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Acked-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu_rmap.h  | 13 ++++---------
 include/linux/interrupt.h |  5 -----
 2 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
index ac3bbb5b9502..1739510d8994 100644
--- a/include/linux/cpu_rmap.h
+++ b/include/linux/cpu_rmap.h
@@ -13,9 +13,11 @@
 #include <linux/cpumask.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/kref.h>
 
 /**
  * struct cpu_rmap - CPU affinity reverse-map
+ * @refcount: kref for object
  * @size: Number of objects to be reverse-mapped
  * @used: Number of objects added
  * @obj: Pointer to array of object pointers
@@ -23,6 +25,7 @@
  *      based on affinity masks
  */
 struct cpu_rmap {
+	struct kref	refcount;
 	u16		size, used;
 	void		**obj;
 	struct {
@@ -33,15 +36,7 @@ struct cpu_rmap {
 #define CPU_RMAP_DIST_INF 0xffff
 
 extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags);
-
-/**
- * free_cpu_rmap - free CPU affinity reverse-map
- * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL
- */
-static inline void free_cpu_rmap(struct cpu_rmap *rmap)
-{
-	kfree(rmap);
-}
+extern int cpu_rmap_put(struct cpu_rmap *rmap);
 
 extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
 extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5e4e6170f43a..5fa5afeeb759 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -268,11 +268,6 @@ struct irq_affinity_notify {
 extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
-static inline void irq_run_affinity_notifiers(void)
-{
-	flush_scheduled_work();
-}
-
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
-- 
cgit v1.2.3


From 1b963c81b14509e330e0fe3218b645ece2738dc5 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 11 Jan 2013 14:31:56 -0800
Subject: lockdep, rwsem: provide down_write_nest_lock()

down_write_nest_lock() provides a means to annotate locking scenario
where an outer lock is guaranteed to serialize the order nested locks
are being acquired.

This is analogoue to already existing mutex_lock_nest_lock() and
spin_lock_nest_lock().

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mel Gorman <mel@csn.ul.ie>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 3 +++
 include/linux/rwsem.h   | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 00e46376e28f..2bca44b0893c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -524,14 +524,17 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # ifdef CONFIG_PROVE_LOCKING
 #  define rwsem_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, NULL, i)
+#  define rwsem_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 2, n, i)
 #  define rwsem_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 1, 2, NULL, i)
 # else
 #  define rwsem_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, NULL, i)
+#  define rwsem_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 1, n, i)
 #  define rwsem_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 1, 1, NULL, i)
 # endif
 # define rwsem_release(l, n, i)			lock_release(l, n, i)
 #else
 # define rwsem_acquire(l, s, t, i)		do { } while (0)
+# define rwsem_acquire_nest(l, s, t, n, i)	do { } while (0)
 # define rwsem_acquire_read(l, s, t, i)		do { } while (0)
 # define rwsem_release(l, n, i)			do { } while (0)
 #endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 54bd7cd7ecbd..413cc11e414a 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -125,8 +125,17 @@ extern void downgrade_write(struct rw_semaphore *sem);
  */
 extern void down_read_nested(struct rw_semaphore *sem, int subclass);
 extern void down_write_nested(struct rw_semaphore *sem, int subclass);
+extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock);
+
+# define down_write_nest_lock(sem, nest_lock)			\
+do {								\
+	typecheck(struct lockdep_map *, &(nest_lock)->dep_map);	\
+	_down_write_nest_lock(sem, &(nest_lock)->dep_map);	\
+} while (0);
+
 #else
 # define down_read_nested(sem, subclass)		down_read(sem)
+# define down_write_nest_lock(sem, nest_lock)	down_read(sem)
 # define down_write_nested(sem, subclass)	down_write(sem)
 #endif
 
-- 
cgit v1.2.3


From 7b9205bd775afc4439ed86d617f9042ee9e76a71 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Jan 2013 14:32:05 -0800
Subject: audit: create explicit AUDIT_SECCOMP event type

The seccomp path was using AUDIT_ANOM_ABEND from when seccomp mode 1
could only kill a process.  While we still want to make sure an audit
record is forced on a kill, this should use a separate record type since
seccomp mode 2 introduces other behaviors.

In the case of "handled" behaviors (process wasn't killed), only emit a
record if the process is under inspection.  This change also fixes
userspace examination of seccomp audit events, since it was considered
malformed due to missing fields of the AUDIT_ANOM_ABEND event type.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Paris <eparis@redhat.com>
Cc: Jeff Layton <jlayton@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Julien Tinnes <jln@google.com>
Acked-by: Will Drewry <wad@chromium.org>
Acked-by: Steve Grubb <sgrubb@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index bce729afbcf9..9d5104d7aba9 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -157,7 +157,8 @@ void audit_core_dumps(long signr);
 
 static inline void audit_seccomp(unsigned long syscall, long signr, int code)
 {
-	if (unlikely(!audit_dummy_context()))
+	/* Force a record to be reported if a signal was delivered. */
+	if (signr || unlikely(!audit_dummy_context()))
 		__audit_seccomp(syscall, signr, code);
 }
 
-- 
cgit v1.2.3


From c0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 11 Jan 2013 14:32:13 -0800
Subject: linux/audit.h: move ptrace.h include to kernel header

While the kernel internals want pt_regs (and so it includes
linux/ptrace.h), the user version of audit.h does not need it.  So move
the include out of the uapi version.

This avoids issues where people want the audit defines and userland
ptrace api.  Including both the kernel ptrace and the userland ptrace
headers can easily lead to failure.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 9d5104d7aba9..5a6d718adf34 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -24,6 +24,7 @@
 #define _LINUX_AUDIT_H_
 
 #include <linux/sched.h>
+#include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
 
 struct audit_sig_info {
-- 
cgit v1.2.3


From 8fb74b9fb2b182d54beee592350d9ea1f325917a Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 11 Jan 2013 14:32:16 -0800
Subject: mm: compaction: partially revert capture of suitable high-order page

Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when
waiting for POLLIN on a local TCP socket.  It was easier to trigger if
there was disk IO and dirty pages at the same time and he bisected it to
commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page
immediately when it is made available").

The intention of that patch was to improve high-order allocations under
memory pressure after changes made to reclaim in 3.6 drastically hurt
THP allocations but the approach was flawed.  For Eric, the problem was
that page->pfmemalloc was not being cleared for captured pages leading
to a poor interaction with swap-over-NFS support causing the packets to
be dropped.  However, I identified a few more problems with the patch
including the fact that it can increase contention on zone->lock in some
cases which could result in async direct compaction being aborted early.

In retrospect the capture patch took the wrong approach.  What it should
have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it
was allocating for THP and avoided races that way.  While the patch was
showing to improve allocation success rates at the time, the benefit is
marginal given the relative complexity and it should be revisited from
scratch in the context of the other reclaim-related changes that have
taken place since the patch was first written and tested.  This patch
partially reverts commit 1fb3f8ca0e92 ("mm: compaction: capture a
suitable high-order page immediately when it is made available").

Reported-and-tested-by: Eric Wong <normalperson@yhbt.net>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 4 ++--
 include/linux/mm.h         | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 6ecb6dc2f303..cc7bddeaf553 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			bool sync, bool *contended, struct page **page);
+			bool sync, bool *contended);
 extern int compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync, bool *contended, struct page **page)
+			bool sync, bool *contended)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63204078f72b..66e2f7c61e5c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages);
 
 void split_page(struct page *page, unsigned int order);
 int split_free_page(struct page *page);
-int capture_free_page(struct page *page, int alloc_order, int migratetype);
 
 /*
  * Compound pages have a destructor function.  Provide a
-- 
cgit v1.2.3


From 3cb7a56344ca45ee56d71c5f8fe9f922306bff1f Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 11 Jan 2013 14:32:20 -0800
Subject: lib/rbtree.c: avoid the use of non-static __always_inline

lib/rbtree.c declared __rb_erase_color() as __always_inline void, and
then exported it with EXPORT_SYMBOL.

This was because __rb_erase_color() must be exported for augmented
rbtree users, but it must also be inlined into rb_erase() so that the
dummy callback can get optimized out of that call site.

(Actually with a modern compiler, none of the dummy callback functions
should even be generated as separate text functions).

The above usage is legal C, but it was unusual enough for some compilers
to warn about it.  This change makes things more explicit, with a static
__always_inline ____rb_erase_color function for use in rb_erase(), and a
separate non-inline __rb_erase_color function for use in
rb_erase_augmented call sites.

Signed-off-by: Michel Lespinasse <walken@google.com>
Reported-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 2ac60c9cf644..fea49b5da12a 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -123,9 +123,9 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
 extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
 
-static __always_inline void
-rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-		   const struct rb_augment_callbacks *augment)
+static __always_inline struct rb_node *
+__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		     const struct rb_augment_callbacks *augment)
 {
 	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
 	struct rb_node *parent, *rebalance;
@@ -217,6 +217,14 @@ rb_erase_augmented(struct rb_node *node, struct rb_root *root,
 	}
 
 	augment->propagate(tmp, NULL);
+	return rebalance;
+}
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		   const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
 	if (rebalance)
 		__rb_erase_color(rebalance, root, augment->rotate);
 }
-- 
cgit v1.2.3


From d07d7507bfb4e23735c9b83e397c43e1e8a173e8 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Thu, 10 Jan 2013 23:19:10 +0000
Subject: net, wireless: overwrite default_ethtool_ops

Since:

commit 2c60db037034d27f8c636403355d52872da92f81
Author: Eric Dumazet <edumazet@google.com>
Date:   Sun Sep 16 09:17:26 2012 +0000

    net: provide a default dev->ethtool_ops

wireless core does not correctly assign ethtool_ops.

After alloc_netdev*() call, some cfg80211 drivers provide they own
ethtool_ops, but some do not. For them, wireless core provide generic
cfg80211_ethtool_ops, which is assigned in NETDEV_REGISTER notify call:

        if (!dev->ethtool_ops)
                dev->ethtool_ops = &cfg80211_ethtool_ops;

But after Eric's commit, dev->ethtool_ops is no longer NULL (on cfg80211
drivers without custom ethtool_ops), but points to &default_ethtool_ops.

In order to fix the problem, provide function which will overwrite
default_ethtool_ops and use it by wireless core.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c599e4782d45..9ef07d0868b6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -60,6 +60,9 @@ struct wireless_dev;
 #define SET_ETHTOOL_OPS(netdev,ops) \
 	( (netdev)->ethtool_ops = (ops) )
 
+extern void netdev_set_default_ethtool_ops(struct net_device *dev,
+					   const struct ethtool_ops *ops);
+
 /* hardware address assignment types */
 #define NET_ADDR_PERM		0	/* address is permanent (default) */
 #define NET_ADDR_RANDOM		1	/* address is generated randomly */
-- 
cgit v1.2.3


From 0d21b0e3477395e7ff2acc269f15df6e6a8d356d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 12 Jan 2013 11:38:44 +1030
Subject: module: add new state MODULE_STATE_UNFORMED.

You should never look at such a module, so it's excised from all paths
which traverse the modules list.

We add the state at the end, to avoid gratuitous ABI break (ksplice).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 7760c6d344a3..1375ee3f03aa 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -199,11 +199,11 @@ struct module_use {
 	struct module *source, *target;
 };
 
-enum module_state
-{
-	MODULE_STATE_LIVE,
-	MODULE_STATE_COMING,
-	MODULE_STATE_GOING,
+enum module_state {
+	MODULE_STATE_LIVE,	/* Normal state. */
+	MODULE_STATE_COMING,	/* Full formed, running module_init. */
+	MODULE_STATE_GOING,	/* Going away. */
+	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
 /**
-- 
cgit v1.2.3


From 803739d25c2343da6d2f95eebdcbc08bf67097d4 Mon Sep 17 00:00:00 2001
From: Shane Huang <shane.huang@amd.com>
Date: Mon, 17 Dec 2012 23:18:59 +0800
Subject: [libata] replace sata_settings with devslp_timing

NCQ capability was used to check availability of SATA Settings page
from Identify Device Data Log, which contains DevSlp timing variables.
It does not work on some HDDs and leads to error messages.

IDENTIFY word 78 bit 5(Hardware Feature Control) can't work either
because it is only the sufficient condition of Identify Device data
log, not the necessary condition.

This patch replaced ata_device->sata_settings with ->devslp_timing
to only save DevSlp timing variables(8 bytes), instead of the whole
SATA Settings page(512 bytes).

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=51881

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Shane Huang <shane.huang@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/ata.h    | 8 +++++---
 include/linux/libata.h | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 408da9502177..8f7a3d68371a 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -297,10 +297,12 @@ enum {
 	ATA_LOG_SATA_NCQ	= 0x10,
 	ATA_LOG_SATA_ID_DEV_DATA  = 0x30,
 	ATA_LOG_SATA_SETTINGS	  = 0x08,
-	ATA_LOG_DEVSLP_MDAT	  = 0x30,
+	ATA_LOG_DEVSLP_OFFSET	  = 0x30,
+	ATA_LOG_DEVSLP_SIZE	  = 0x08,
+	ATA_LOG_DEVSLP_MDAT	  = 0x00,
 	ATA_LOG_DEVSLP_MDAT_MASK  = 0x1F,
-	ATA_LOG_DEVSLP_DETO	  = 0x31,
-	ATA_LOG_DEVSLP_VALID	  = 0x37,
+	ATA_LOG_DEVSLP_DETO	  = 0x01,
+	ATA_LOG_DEVSLP_VALID	  = 0x07,
 	ATA_LOG_DEVSLP_VALID_MASK = 0x80,
 
 	/* READ/WRITE LONG (obsolete) */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 83ba0ab2c915..649e5f86b5f0 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -652,8 +652,8 @@ struct ata_device {
 		u32		gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */
 	};
 
-	/* Identify Device Data Log (30h), SATA Settings (page 08h) */
-	u8			sata_settings[ATA_SECT_SIZE];
+	/* DEVSLP Timing Variables from Identify Device Data Log */
+	u8			devslp_timing[ATA_LOG_DEVSLP_SIZE];
 
 	/* error history */
 	int			spdn_cnt;
-- 
cgit v1.2.3


From 8aef33a7cf40ca9da188e8578b2abe7267a38c52 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 15 Jan 2013 14:18:04 +0100
Subject: cpuidle: remove the power_specified field in the driver

We realized that the power usage field is never filled and when it
is filled for tegra, the power_specified flag is not set causing all
of these values to be reset when the driver is initialized with
set_power_state().

However, the power_specified flag can be simply removed under the
assumption that the states are always backward sorted, which is the
case with the current code.

This change allows the menu governor select function and the
cpuidle_play_dead() to be simplified.  Moreover, the
set_power_states() function can removed as it does not make sense
any more.

Drop the power_specified flag from struct cpuidle_driver and make
the related changes as described above.

As a consequence, this also fixes the bug where on the dynamic
C-states system, the power fields are not initialized.

[rjw: Changelog]
References: https://bugzilla.kernel.org/show_bug.cgi?id=42870
References: https://bugzilla.kernel.org/show_bug.cgi?id=43349
References: https://lkml.org/lkml/2012/10/16/518
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpuidle.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3711b34dc4f9..24cd1037b6d6 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -126,9 +126,9 @@ struct cpuidle_driver {
 	struct module 		*owner;
 	int                     refcnt;
 
-	unsigned int		power_specified:1;
 	/* set to 1 to use the core cpuidle time keeping (for all states). */
 	unsigned int		en_core_tk_irqen:1;
+	/* states array must be ordered in decreasing power consumption */
 	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
 	int			state_count;
 	int			safe_state_index;
-- 
cgit v1.2.3


From 774a1221e862b343388347bac9b318767336b20b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 15 Jan 2013 18:52:51 -0800
Subject: module, async: async_synchronize_full() on module init iff async is
 used

If the default iosched is built as module, the kernel may deadlock
while trying to load the iosched module on device probe if the probing
was running off async.  This is because async_synchronize_full() at
the end of module init ends up waiting for the async job which
initiated the module loading.

 async A				modprobe

 1. finds a device
 2. registers the block device
 3. request_module(default iosched)
					4. modprobe in userland
					5. load and init module
					6. async_synchronize_full()

Async A waits for modprobe to finish in request_module() and modprobe
waits for async A to finish in async_synchronize_full().

Because there's no easy to track dependency once control goes out to
userland, implementing properly nested flushing is difficult.  For
now, make module init perform async_synchronize_full() iff module init
has queued async jobs as suggested by Linus.

This avoids the described deadlock because iosched module doesn't use
async and thus wouldn't invoke async_synchronize_full().  This is
hacky and incomplete.  It will deadlock if async module loading nests;
however, this works around the known problem case and seems to be the
best of bad options.

For more details, please refer to the following thread.

  http://thread.gmane.org/gmane.linux.kernel/1420814

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Alex Riesen <raa.lkml@gmail.com>
Tested-by: Ming Lei <ming.lei@canonical.com>
Tested-by: Alex Riesen <raa.lkml@gmail.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 206bb089c06b..6fc8f45de4e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1810,6 +1810,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
+#define PF_USED_ASYNC	0x00004000	/* used async_schedule*(), used by module init */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
-- 
cgit v1.2.3


From e65b9ad222c280c031bc8d3642cc38dd3026fe06 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 15 Jan 2013 20:12:37 +0100
Subject: lockdep, rwsem: fix down_write_nest_lock() if
 !CONFIG_DEBUG_LOCK_ALLOC

Commit 1b963c81b145 ("lockdep, rwsem: provide down_write_nest_lock()")
contains a bug in a codepath when CONFIG_DEBUG_LOCK_ALLOC is disabled,
which causes down_read() to be called instead of down_write() by mistake
on such configurations.  Fix that.

Reported-and-tested-by: Andrew Clayton <andrew@digital-domain.net>
Reported-and-tested-by: Zlatko Calusic <zlatko.calusic@iskon.hr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rwsem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 413cc11e414a..8da67d625e13 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -135,7 +135,7 @@ do {								\
 
 #else
 # define down_read_nested(sem, subclass)		down_read(sem)
-# define down_write_nest_lock(sem, nest_lock)	down_read(sem)
+# define down_write_nest_lock(sem, nest_lock)	down_write(sem)
 # define down_write_nested(sem, subclass)	down_write(sem)
 #endif
 
-- 
cgit v1.2.3


From edea0d03ee5f0ae0051b6adb6681ebdf976b1ca4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 20 Jan 2013 20:25:47 +0100
Subject: ia64: kill thread_matches(), unexport ptrace_check_attach()

The ia64 function "thread_matches()" has no users since commit
e868a55c2a8c ("[IA64] remove find_thread_for_addr()").  Remove it.

This allows us to make ptrace_check_attach() static to kernel/ptrace.c,
which is good since we'll need to change the semantics of it and fix up
all the callers.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 1693775ecfe8..89573a33ab3c 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -45,7 +45,6 @@ extern long arch_ptrace(struct task_struct *child, long request,
 extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern void ptrace_disable(struct task_struct *);
-extern int ptrace_check_attach(struct task_struct *task, bool ignore_state);
 extern int ptrace_request(struct task_struct *child, long request,
 			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
-- 
cgit v1.2.3


From 910ffdb18a6408e14febbb6e4b6840fd2c928c82 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Jan 2013 20:47:41 +0100
Subject: ptrace: introduce signal_wake_up_state() and ptrace_signal_wake_up()

Cleanup and preparation for the next change.

signal_wake_up(resume => true) is overused. None of ptrace/jctl callers
actually want to wakeup a TASK_WAKEKILL task, but they can't specify the
necessary mask.

Turn signal_wake_up() into signal_wake_up_state(state), reintroduce
signal_wake_up() as a trivial helper, and add ptrace_signal_wake_up()
which adds __TASK_TRACED.

This way ptrace_signal_wake_up() can work "inside" ptrace_request()
even if the tracee doesn't have the TASK_WAKEKILL bit set.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6fc8f45de4e9..d2112477ff5e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2714,7 +2714,16 @@ static inline void thread_group_cputime_init(struct signal_struct *sig)
 extern void recalc_sigpending_and_wake(struct task_struct *t);
 extern void recalc_sigpending(void);
 
-extern void signal_wake_up(struct task_struct *t, int resume_stopped);
+extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
+
+static inline void signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
+}
+static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
+}
 
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
-- 
cgit v1.2.3