From 7e05484f02e1ea05a3aae0724d4df1e8a5a1920f Mon Sep 17 00:00:00 2001 From: Bryan Venteicher Date: Tue, 16 Oct 2012 23:55:54 +1030 Subject: virtio-scsi: Add real 2-clause BSD license to header This is analogous to commit a1b383870a made by Rusty Russell to all the VirtIO headers at the time. This eases the use of the header as is by other OSes. Signed-off-by: Bryan Venteicher Acked-by: Paolo Bonzini Signed-off-by: Rusty Russell --- include/linux/virtio_scsi.h | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h index d6b4440387b7..4195b97a3def 100644 --- a/include/linux/virtio_scsi.h +++ b/include/linux/virtio_scsi.h @@ -1,7 +1,31 @@ +/* + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #ifndef _LINUX_VIRTIO_SCSI_H #define _LINUX_VIRTIO_SCSI_H -/* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ #define VIRTIO_SCSI_CDB_SIZE 32 #define VIRTIO_SCSI_SENSE_SIZE 96 -- cgit v1.2.3 From 1b4f59e356cc94929305bd107b7f38eec62715ad Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 22 Oct 2012 18:05:36 +0400 Subject: slub: Commonize slab_cache field in struct page Right now, slab and slub have fields in struct page to derive which cache a page belongs to, but they do it slightly differently. slab uses a field called slab_cache, that lives in the third double word. slub, uses a field called "slab", living outside of the doublewords area. Ideally, we could use the same field for this. Since slub heavily makes use of the doubleword region, there isn't really much room to move slub's slab_cache field around. Since slab does not have such strict placement restrictions, we can move it outside the doubleword area. The naming used by slab, "slab_cache", is less confusing, and it is preferred over slub's generic "slab". Signed-off-by: Glauber Costa Acked-by: Christoph Lameter CC: David Rientjes Signed-off-by: Pekka Enberg --- include/linux/mm_types.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 31f8a3af7d94..2fef4e720e79 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -128,10 +128,7 @@ struct page { }; struct list_head list; /* slobs list of pages */ - struct { /* slab fields */ - struct kmem_cache *slab_cache; - struct slab *slab_page; - }; + struct slab *slab_page; /* slab fields */ }; /* Remainder is not double word aligned */ @@ -146,7 +143,7 @@ struct page { #if USE_SPLIT_PTLOCKS spinlock_t ptl; #endif - struct kmem_cache *slab; /* SLUB: Pointer to slab */ + struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ struct page *first_page; /* Compound tail pages */ }; -- cgit v1.2.3 From 6c9c6d6301287e369a754d628230fa6e50cdb74b Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 8 Oct 2012 11:08:06 -0600 Subject: dma-debug: New interfaces to debug dma mapping errors Add dma-debug interface debug_dma_mapping_error() to debug drivers that fail to check dma mapping errors on addresses returned by dma_map_single() and dma_map_page() interfaces. This interface clears a flag set by debug_dma_map_page() to indicate that dma_mapping_error() has been called by the driver. When driver does unmap, debug_dma_unmap() checks the flag and if this flag is still set, prints warning message that includes call trace that leads up to the unmap. This interface can be called from dma_mapping_error() routines to enable dma mapping error check debugging. Tested: Intel iommu and swiotlb (iommu=soft) on x86-64 with CONFIG_DMA_API_DEBUG enabled and disabled. Signed-off-by: Shuah Khan Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 171ad8aedc83..fc0e34ce038f 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -39,6 +39,8 @@ extern void debug_dma_map_page(struct device *dev, struct page *page, int direction, dma_addr_t dma_addr, bool map_single); +extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); + extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single); @@ -105,6 +107,11 @@ static inline void debug_dma_map_page(struct device *dev, struct page *page, { } +static inline void debug_dma_mapping_error(struct device *dev, + dma_addr_t dma_addr) +{ +} + static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single) -- cgit v1.2.3 From 242860a47a75b933a79a30f6a40bf4858f4a3ecc Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 19 Oct 2012 09:33:12 -0300 Subject: mm/sl[aou]b: Move common kmem_cache_size() to slab.h This function is identically defined in all three allocators and it's trivial to move it to slab.h Since now it's static, inline, header-defined function this patch also drops the EXPORT_SYMBOL tag. Cc: Pekka Enberg Cc: Matt Mackall Acked-by: Christoph Lameter Signed-off-by: Ezequiel Garcia Signed-off-by: Pekka Enberg --- include/linux/slab.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 83d1a1454b7e..743a10415122 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); -unsigned int kmem_cache_size(struct kmem_cache *); /* * Please use this macro to create slab caches. Simply specify the @@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } +/* + * Determine the size of a slab object + */ +static inline unsigned int kmem_cache_size(struct kmem_cache *s) +{ + return s->object_size; +} + void __init kmem_cache_init_late(void); #endif /* _LINUX_SLAB_H */ -- cgit v1.2.3 From 86b00e0da6be7bbc16412f126c5b548ac5d91d50 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:42 -0500 Subject: rbd: get parent spec for version 2 images Add support for getting the the information identifying the parent image for rbd images that have them. The child image holds a reference to its parent image specification structure. Create a new entry "parent" in /sys/bus/rbd/image/N/ to report the identifying information for the parent image, if any. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/rados.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 0a99099801a4..15077db662ed 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -87,6 +87,8 @@ struct ceph_pg { * * lpgp_num -- as above. */ +#define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ + #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 #define CEPH_PG_POOL_VERSION 2 -- cgit v1.2.3 From 72afc71ffca0f444ee0e1ef8c7e34ab209bb48b3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Oct 2012 19:40:33 -0500 Subject: libceph: define ceph_pg_pool_name_by_id() Define and export function ceph_pg_pool_name_by_id() to supply the name of a pg pool whose id is given. This will be used by the next patch. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osdmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e88a620b9f8a..5ea57ba69320 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); +extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); #endif -- cgit v1.2.3 From 216b6cbdcbd86b1db0754d58886b466ae31f5a63 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 29 Aug 2012 10:10:10 -0400 Subject: exportfs: add FILEID_INVALID to indicate invalid fid_type This commit adds FILEID_INVALID = 0xff in fid_type to indicate invalid fid_type It avoids using magic number 255 Signed-off-by: Namjae Jeon Signed-off-by: Vivek Trivedi Signed-off-by: J. Bruce Fields --- include/linux/exportfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 12291a7ee275..0e1452546300 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -83,6 +83,11 @@ enum fid_type { * 64 bit parent inode number. */ FILEID_NILFS_WITH_PARENT = 0x62, + + /* + * Filesystems must not use 0xff file ID. + */ + FILEID_INVALID = 0xff, }; struct fid { -- cgit v1.2.3 From 2be975c6d920de989ff5e4bc09ffe87e59d94662 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 3 Nov 2012 12:16:12 -0700 Subject: Input: introduce managed input devices (add devres support) There is a demand from driver's writers to use managed devices framework for their drivers. Unfortunately up to this moment input devices did not provide support for managed devices and that lead to mixing two styles of resource management which usually introduced more bugs, such as manually unregistering input device but relying in devres to free interrupt handler which (unless device is properly shut off) can cause ISR to reference already freed memory. This change introduces devm_input_allocate_device() that will allocate managed instance of input device so that driver writers who prefer using devm_* framework do not have to mix 2 styles. Reviewed-by: Henrik Rydberg Reviewed-by: Tejun Heo Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index cab994ba6d91..5538cc09a4f5 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -112,6 +112,8 @@ struct input_value { * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held * @node: used to place the device onto input_dev_list + * @devres_managed: indicates that devices is managed with devres framework + * and needs not be explicitly unregistered or freed. */ struct input_dev { const char *name; @@ -180,6 +182,8 @@ struct input_dev { unsigned int num_vals; unsigned int max_vals; struct input_value *vals; + + bool devres_managed; }; #define to_input_dev(d) container_of(d, struct input_dev, dev) @@ -323,7 +327,8 @@ struct input_handle { struct list_head h_node; }; -struct input_dev *input_allocate_device(void); +struct input_dev __must_check *input_allocate_device(void); +struct input_dev __must_check *devm_input_allocate_device(struct device *); void input_free_device(struct input_dev *dev); static inline struct input_dev *input_get_device(struct input_dev *dev) -- cgit v1.2.3 From 800963fd598e2acbcd3a21a17e3ab3c185ad0d6a Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Sat, 10 Nov 2012 00:32:36 -0800 Subject: Input: document new members of struct input_dev Fixes kernel-doc warnings for the members added in 3.7-rc1. Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 5538cc09a4f5..82ce323b9986 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -112,6 +112,9 @@ struct input_value { * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held * @node: used to place the device onto input_dev_list + * @num_vals: number of values queued in the current frame + * @max_vals: maximum number of values queued in a frame + * @vals: array of values queued in the current frame * @devres_managed: indicates that devices is managed with devres framework * and needs not be explicitly unregistered or freed. */ -- cgit v1.2.3 From 49839dc93970789cea46f5171cd7f6ec11af64c7 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 6 Nov 2012 16:31:32 +0000 Subject: Revert "ARM: OMAP: convert I2C driver to PM QoS for MPU latency constraints" This reverts commit 3db11feffc1ad2ab9dea27789e6b5b3032827adc (ARM: OMAP: convert I2C driver to PM QoS for MPU latency constraints). This commit causes I2C timeouts to appear on several OMAP3430/3530-based boards: http://marc.info/?l=linux-arm-kernel&m=135071372426971&w=2 http://marc.info/?l=linux-arm-kernel&m=135067558415214&w=2 http://marc.info/?l=linux-arm-kernel&m=135216013608196&w=2 and appears to have been sent for merging before one of its prerequisites was merged: http://marc.info/?l=linux-arm-kernel&m=135219411617621&w=2 Signed-off-by: Paul Walmsley Acked-by: Jean Pihet Signed-off-by: Wolfram Sang --- include/linux/i2c-omap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h index df804ba73e0b..92a0dc75bc74 100644 --- a/include/linux/i2c-omap.h +++ b/include/linux/i2c-omap.h @@ -34,6 +34,7 @@ struct omap_i2c_bus_platform_data { u32 clkrate; u32 rev; u32 flags; + void (*set_mpu_wkup_lat)(struct device *dev, long set); }; #endif -- cgit v1.2.3 From 2c88ab8c5af7d637d2a9d14b607fa6100fa64236 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti D Date: Mon, 5 Nov 2012 17:53:39 +0530 Subject: ARM: i2c: omap: Remove the i207 errata flag The commit [i2c: omap: use revision check for OMAP_I2C_FLAG_APPLY_ERRATA_I207] uses the revision id instead of the flag. So the flag can be safely removed. Reviewed-by: Felipe Balbi Signed-off-by: Shubhrajyoti D Signed-off-by: Wolfram Sang --- include/linux/i2c-omap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h index 92a0dc75bc74..1b25c04f82d9 100644 --- a/include/linux/i2c-omap.h +++ b/include/linux/i2c-omap.h @@ -21,7 +21,6 @@ #define OMAP_I2C_FLAG_SIMPLE_CLOCK BIT(1) #define OMAP_I2C_FLAG_16BIT_DATA_REG BIT(2) #define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE BIT(3) -#define OMAP_I2C_FLAG_APPLY_ERRATA_I207 BIT(4) #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK BIT(5) #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK BIT(6) /* how the CPU address bus must be translated for I2C unit access */ -- cgit v1.2.3 From 1cf3d8b3d24cd383ddfd5442c83ec5c355ffc2f7 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Tue, 2 Oct 2012 16:57:57 +0000 Subject: powerpc+of: Add of node/property notification chain for adds and removes This patch moves the notification chain for updates to the device tree from the powerpc/pseries code to the base OF code. This makes this functionality available to all architectures. Additionally the notification chain is updated to allow notifications for property add/remove/update. To make this work a pointer to a new struct (of_prop_reconfig) is passed to the routines in the notification chain. The of_prop_reconfig property contains a pointer to the node containing the property and a pointer to the property itself. In the case of property updates, the property pointer refers to the new property. Signed-off-by: Nathan Fontenot Acked-by: Rob Herring Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- include/linux/of.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 72843b72a2b2..fb5d87b66e3e 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -272,11 +273,24 @@ extern int prom_remove_property(struct device_node *np, struct property *prop); extern int prom_update_property(struct device_node *np, struct property *newprop); -#if defined(CONFIG_OF_DYNAMIC) /* For updating the device tree at runtime */ -extern void of_attach_node(struct device_node *); -extern void of_detach_node(struct device_node *); -#endif +#define OF_RECONFIG_ATTACH_NODE 0x0001 +#define OF_RECONFIG_DETACH_NODE 0x0002 +#define OF_RECONFIG_ADD_PROPERTY 0x0003 +#define OF_RECONFIG_REMOVE_PROPERTY 0x0004 +#define OF_RECONFIG_UPDATE_PROPERTY 0x0005 + +struct of_prop_reconfig { + struct device_node *dn; + struct property *prop; +}; + +extern int of_reconfig_notifier_register(struct notifier_block *); +extern int of_reconfig_notifier_unregister(struct notifier_block *); +extern int of_reconfig_notify(unsigned long, void *); + +extern int of_attach_node(struct device_node *); +extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) -- cgit v1.2.3 From 79d1c712958f94372482ad74578b00f44e744c12 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Tue, 2 Oct 2012 16:58:46 +0000 Subject: powerpc+of: Rename the drivers/of prom_* functions to of_* Rename the prom_*_property routines of the generic OF code to of_*_property. This brings them in line with the naming used by the rest of the OF code. Signed-off-by: Nathan Fontenot Acked-by: Geoff Levand Acked-by: Rob Herring Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- include/linux/of.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index fb5d87b66e3e..a093b2fe5dfb 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -268,10 +268,9 @@ extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_machine_is_compatible(const char *compat); -extern int prom_add_property(struct device_node* np, struct property* prop); -extern int prom_remove_property(struct device_node *np, struct property *prop); -extern int prom_update_property(struct device_node *np, - struct property *newprop); +extern int of_add_property(struct device_node *np, struct property *prop); +extern int of_remove_property(struct device_node *np, struct property *prop); +extern int of_update_property(struct device_node *np, struct property *newprop); /* For updating the device tree at runtime */ #define OF_RECONFIG_ATTACH_NODE 0x0001 -- cgit v1.2.3 From 621eb19ce1ec216e03ad354cb0c4061736b2a436 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 14 Nov 2012 10:48:05 -0500 Subject: svcrpc: Revert "sunrpc/cache.h: replace simple_strtoul" Commit bbf43dc888833ac0539e437dbaeb28bfd4fbab9f "sunrpc/cache.h: replace simple_strtoul" introduced new range-checking which could cause get_int to fail on unsigned integers too large to be represented as an int. We could parse them as unsigned instead--but it turns out svcgssd is actually passing down "-1" in some cases. Which is perhaps stupid, but there's nothing we can do about it now. So just revert back to the previous "sloppy" behavior that accepts either representation. Cc: stable@vger.kernel.org Reported-by: Sven Geggus Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index f792794f6634..5dc9ee4d616e 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -217,6 +217,8 @@ extern int qword_get(char **bpp, char *dest, int bufsize); static inline int get_int(char **bpp, int *anint) { char buf[50]; + char *ep; + int rv; int len = qword_get(bpp, buf, sizeof(buf)); if (len < 0) @@ -224,9 +226,11 @@ static inline int get_int(char **bpp, int *anint) if (len == 0) return -ENOENT; - if (kstrtoint(buf, 0, anint)) + rv = simple_strtol(buf, &ep, 0); + if (*ep) return -EINVAL; + *anint = rv; return 0; } -- cgit v1.2.3 From fc05d5a30dc19dd4c6d161e551719a8c597c7890 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 4 Oct 2012 09:28:49 +0200 Subject: mtd: delete nomadik_nand driver The nomadik_nand driver is really just a subset of the FSMC NAND driver, and there are no users anymore so let's delete it. Signed-off-by: Linus Walleij Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Artem Bityutskiy --- include/linux/platform_data/mtd-nomadik-nand.h | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 include/linux/platform_data/mtd-nomadik-nand.h (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nomadik-nand.h b/include/linux/platform_data/mtd-nomadik-nand.h deleted file mode 100644 index c3c8254c22a5..000000000000 --- a/include/linux/platform_data/mtd-nomadik-nand.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __ASM_ARCH_NAND_H -#define __ASM_ARCH_NAND_H - -struct nomadik_nand_platform_data { - struct mtd_partition *parts; - int nparts; - int options; - int (*init) (void); - int (*exit) (void); -}; - -#define NAND_IO_DATA 0x40000000 -#define NAND_IO_CMD 0x40800000 -#define NAND_IO_ADDR 0x41000000 - -#endif /* __ASM_ARCH_NAND_H */ -- cgit v1.2.3 From 6d7b42a447f92eb3e7e410bbf62042693eb040f7 Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Thu, 4 Oct 2012 15:14:16 +0200 Subject: mtd: fsmc_nand: pass the ale and cmd resource via resource Do not use the platform_data to pass resource and be smart in the drivers. Just pass it via resource Switch to devm_request_and_ioremap at the sametime Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Acked-by: Linus Walleij Reviewed-By: Vipin Kumar Signed-off-by: Artem Bityutskiy --- include/linux/mtd/fsmc.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h index b20029221fb1..d6ed61ef451d 100644 --- a/include/linux/mtd/fsmc.h +++ b/include/linux/mtd/fsmc.h @@ -155,9 +155,6 @@ struct fsmc_nand_platform_data { unsigned int width; unsigned int bank; - /* CLE, ALE offsets */ - unsigned int cle_off; - unsigned int ale_off; enum access_mode mode; void (*select_bank)(uint32_t bank, uint32_t busw); -- cgit v1.2.3 From 2f25ae97fe4b424d88d765797c46456c7c0f1bae Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Tue, 9 Oct 2012 16:14:53 +0530 Subject: mtd: nand: Increase the ecc placement locations to 640 Few devices like H27UBG8T2CTR have a writesize/oobsize of 8KB/640B. This means that the maximum oobsize has gone up to 640 bytes and consequently the maximum ecc placement locations have also gone up to 640. Signed-off-by: Vipin Kumar Signed-off-by: Artem Bityutskiy --- include/linux/mtd/mtd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 81d61e704599..f9ac2897b86b 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -98,7 +98,7 @@ struct mtd_oob_ops { }; #define MTD_MAX_OOBFREE_ENTRIES_LARGE 32 -#define MTD_MAX_ECCPOS_ENTRIES_LARGE 448 +#define MTD_MAX_ECCPOS_ENTRIES_LARGE 640 /* * Internal ECC layout control structure. For historical reasons, there is a * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained -- cgit v1.2.3 From 5de0b52ea8f8f5149502867acff2efb5efaf1fc2 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Sat, 13 Oct 2012 13:03:29 -0400 Subject: mtd: gpmi: remove unneccessary header The whole gpmi-nand driver has turned to pure devicetree supported. So the linux/mtd/gpmi-nand.h is not neccessary now. Just remove it, and move some macros to the gpmi-nand driver itself. Signed-off-by: Huang Shijie Signed-off-by: Artem Bityutskiy --- include/linux/mtd/gpmi-nand.h | 68 ------------------------------------------- 1 file changed, 68 deletions(-) delete mode 100644 include/linux/mtd/gpmi-nand.h (limited to 'include/linux') diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h deleted file mode 100644 index ed3c4e09f3d1..000000000000 --- a/include/linux/mtd/gpmi-nand.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef __MACH_MXS_GPMI_NAND_H__ -#define __MACH_MXS_GPMI_NAND_H__ - -/* The size of the resources is fixed. */ -#define GPMI_NAND_RES_SIZE 6 - -/* Resource names for the GPMI NAND driver. */ -#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME "gpmi-nand" -#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME "GPMI NAND GPMI Interrupt" -#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME "bch" -#define GPMI_NAND_BCH_INTERRUPT_RES_NAME "bch" -#define GPMI_NAND_DMA_CHANNELS_RES_NAME "GPMI NAND DMA Channels" -#define GPMI_NAND_DMA_INTERRUPT_RES_NAME "gpmi-dma" - -/** - * struct gpmi_nand_platform_data - GPMI NAND driver platform data. - * - * This structure communicates platform-specific information to the GPMI NAND - * driver that can't be expressed as resources. - * - * @platform_init: A pointer to a function the driver will call to - * initialize the platform (e.g., set up the pin mux). - * @min_prop_delay_in_ns: Minimum propagation delay of GPMI signals to and - * from the NAND Flash device, in nanoseconds. - * @max_prop_delay_in_ns: Maximum propagation delay of GPMI signals to and - * from the NAND Flash device, in nanoseconds. - * @max_chip_count: The maximum number of chips for which the driver - * should configure the hardware. This value most - * likely reflects the number of pins that are - * connected to a NAND Flash device. If this is - * greater than the SoC hardware can support, the - * driver will print a message and fail to initialize. - * @partitions: An optional pointer to an array of partition - * descriptions. - * @partition_count: The number of elements in the partitions array. - */ -struct gpmi_nand_platform_data { - /* SoC hardware information. */ - int (*platform_init)(void); - - /* NAND Flash information. */ - unsigned int min_prop_delay_in_ns; - unsigned int max_prop_delay_in_ns; - unsigned int max_chip_count; - - /* Medium information. */ - struct mtd_partition *partitions; - unsigned partition_count; -}; -#endif -- cgit v1.2.3 From e8a9d8f31c592eea89f1b0d3fd425e7a96944e88 Mon Sep 17 00:00:00 2001 From: Bastian Hecht Date: Fri, 19 Oct 2012 12:15:34 +0200 Subject: mtd: sh_flctl: Minor cleanups Some small fixes to avoid sparse and smatch complain. Other cosmetic fixes as well. - Change of the type of the member index in struct sh_flctl from signed to unsigned. We use index by addressing array members, so unsigned is more concise here. Adapt functions relying on sh_flctl::index. - Remove a blurring cast in write_fiforeg(). - Apply consistent naming scheme when refering to the data buffer. - Shorten some unnecessarily verbose functions. - Remove spaces at start of lines. Signed-off-by: Bastian Hecht Signed-off-by: Artem Bityutskiy --- include/linux/mtd/sh_flctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h index 01e4b15b280e..481557688d05 100644 --- a/include/linux/mtd/sh_flctl.h +++ b/include/linux/mtd/sh_flctl.h @@ -147,7 +147,7 @@ struct sh_flctl { uint8_t done_buff[2048 + 64]; /* max size 2048 + 64 */ int read_bytes; - int index; + unsigned int index; int seqin_column; /* column in SEQIN cmd */ int seqin_page_addr; /* page_addr in SEQIN cmd */ uint32_t seqin_read_cmd; /* read cmd in SEQIN cmd */ -- cgit v1.2.3 From 83738d87e3a0a4096e1419a65b8228130d183df6 Mon Sep 17 00:00:00 2001 From: Bastian Hecht Date: Fri, 19 Oct 2012 12:15:35 +0200 Subject: mtd: sh_flctl: Add DMA capabilty The code probes if DMA channels can get allocated and tears them down at removal/failure if needed. If available it uses them to transfer the data part (not ECC). On failure we fall back to PIO mode. Based on Guennadi Liakhovetski's code from the sh_mmcif driver. Signed-off-by: Bastian Hecht Reviewed-by: Guennadi Liakhovetski Signed-off-by: Artem Bityutskiy --- include/linux/mtd/sh_flctl.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h index 481557688d05..1c28f8879b1c 100644 --- a/include/linux/mtd/sh_flctl.h +++ b/include/linux/mtd/sh_flctl.h @@ -20,6 +20,7 @@ #ifndef __SH_FLCTL_H__ #define __SH_FLCTL_H__ +#include #include #include #include @@ -107,6 +108,7 @@ #define ESTERINTE (0x1 << 24) /* ECC error interrupt enable */ #define AC1CLR (0x1 << 19) /* ECC FIFO clear */ #define AC0CLR (0x1 << 18) /* Data FIFO clear */ +#define DREQ0EN (0x1 << 16) /* FLDTFIFODMA Request Enable */ #define ECERB (0x1 << 9) /* ECC error */ #define STERB (0x1 << 8) /* Status error */ #define STERINTE (0x1 << 4) /* Status error enable */ @@ -138,6 +140,8 @@ enum flctl_ecc_res_t { FL_TIMEOUT }; +struct dma_chan; + struct sh_flctl { struct mtd_info mtd; struct nand_chip chip; @@ -161,6 +165,11 @@ struct sh_flctl { unsigned hwecc:1; /* Hardware ECC (0 = disabled, 1 = enabled) */ unsigned holden:1; /* Hardware has FLHOLDCR and HOLDEN is set */ unsigned qos_request:1; /* QoS request to prevent deep power shutdown */ + + /* DMA related objects */ + struct dma_chan *chan_fifo0_rx; + struct dma_chan *chan_fifo0_tx; + struct completion dma_complete; }; struct sh_flctl_platform_data { @@ -170,6 +179,9 @@ struct sh_flctl_platform_data { unsigned has_hwecc:1; unsigned use_holden:1; + + unsigned int slave_id_fifo0_tx; + unsigned int slave_id_fifo0_rx; }; static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo) -- cgit v1.2.3 From 9ef525a9141b14d23613faad303cf48a20814f1b Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 25 Oct 2012 09:43:10 -0400 Subject: mtd: Fix kernel-doc content to avoid warning. Add missing colons to fix kernel-doc generation warnings. Signed-off-by: Robert P. J. Day Signed-off-by: Artem Bityutskiy --- include/linux/mtd/nand.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 24e915957e4f..9d8a6048aacd 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -471,8 +471,8 @@ struct nand_buffers { * non 0 if ONFI supported. * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is * supported, 0 otherwise. - * @onfi_set_features [REPLACEABLE] set the features for ONFI nand - * @onfi_get_features [REPLACEABLE] get the features for ONFI nand + * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand + * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand * @ecclayout: [REPLACEABLE] the default ECC placement scheme * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash -- cgit v1.2.3 From 3e9ce49e0ef95e22790a74720f0068696b2477c9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Oct 2012 22:47:26 +0530 Subject: mtd: map: Fix compilation warning This patch is an attempt to fix following compilation warning. In file included from drivers/mtd/chips/cfi_cmdset_0001.c:35:0: drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words': include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wmaybe-uninitialized] I could have used uninitialized_var() too, but didn't used it as the final else part of map_word_load() is missing. So there is a chance that it might be passed uninitialized. Better initialize to zero. Signed-off-by: Viresh Kumar Signed-off-by: Artem Bityutskiy --- include/linux/mtd/map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 3595a0236b0f..56c7936e0c65 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -328,7 +328,7 @@ static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word static inline map_word map_word_load(struct map_info *map, const void *ptr) { - map_word r; + map_word r = {{0} }; if (map_bankwidth_is_1(map)) r.x[0] = *(unsigned char *)ptr; -- cgit v1.2.3 From ebd5ac165f2aaefb767c53112c2010b0ff3df688 Mon Sep 17 00:00:00 2001 From: Shinya Kuribayashi Date: Wed, 24 Oct 2012 19:58:10 +0900 Subject: i2c: i2c-sh_mobile: support I2C hardware block with a faster operating clock On newer SH-/R-Mobile SoCs, a clock supply to the I2C hardware block, which is used to generate the SCL clock output, is getting faster than before, while on the other hand, the SCL clock control registers, ICCH and ICCL, stay unchanged in 9-bit-wide (8+1). On such silicons, the internal SCL clock counter gets incremented every 2 clocks of the operating clock. This patch makes it configurable through platform data. Signed-off-by: Shinya Kuribayashi Signed-off-by: Wolfram Sang --- include/linux/i2c/i2c-sh_mobile.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h index beda7081aead..06e3089795fb 100644 --- a/include/linux/i2c/i2c-sh_mobile.h +++ b/include/linux/i2c/i2c-sh_mobile.h @@ -5,6 +5,7 @@ struct i2c_sh_mobile_platform_data { unsigned long bus_speed; + unsigned int clks_per_count; }; #endif /* __I2C_SH_MOBILE_H__ */ -- cgit v1.2.3 From d611d41b46c96195b9a168a21992782458826e07 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Nov 2012 22:55:27 +0100 Subject: mtd: diskonchip: use inline functions for DocRead/DocWrite The diskonchip drivers traditionally use home-grown macros for doing MMIO accesses, which cause a lot of warnings, at least on ARM machines: drivers/mtd/devices/doc2000.c: In function 'doc_write': drivers/mtd/devices/doc2000.c:854:5: warning: value computed is not used [-Wunused-value] drivers/mtd/devices/doc2000.c: In function 'doc_erase': drivers/mtd/devices/doc2000.c:1123:5: warning: value computed is not used [-Wunused-value drivers/mtd/nand/diskonchip.c: In function 'doc2000_read_byte': drivers/mtd/nand/diskonchip.c:318:3: warning: value computed is not used [-Wunused-value] A nicer solution is to use the architecture-defined I/O accessors. Here, we use the __raw_readl/__raw_writel style, instead of the proper readl/writel ones, in order to preserve the odd semantics of the existing macros that have their own barrier implementation and no byte swap. It would be nice to fix this properly and use the correct accessors as well as make the word size independent from the architecture, but I guess the hardware is obsolete enough that we should better not mess the driver an more than necessary. Signed-off-by: Arnd Bergmann Signed-off-by: Artem Bityutskiy --- include/linux/mtd/doc2000.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/doc2000.h b/include/linux/mtd/doc2000.h index 0f6fea73a1f6..407d1e556c39 100644 --- a/include/linux/mtd/doc2000.h +++ b/include/linux/mtd/doc2000.h @@ -92,12 +92,26 @@ * Others use readb/writeb */ #if defined(__arm__) -#define ReadDOC_(adr, reg) ((unsigned char)(*(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)))) -#define WriteDOC_(d, adr, reg) do{ *(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)) = (__u32)d; wmb();} while(0) +static inline u8 ReadDOC_(u32 __iomem *addr, unsigned long reg) +{ + return __raw_readl(addr + reg); +} +static inline void WriteDOC_(u8 data, u32 __iomem *addr, unsigned long reg) +{ + __raw_writel(data, addr + reg); + wmb(); +} #define DOC_IOREMAP_LEN 0x8000 #elif defined(__ppc__) -#define ReadDOC_(adr, reg) ((unsigned char)(*(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)))) -#define WriteDOC_(d, adr, reg) do{ *(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)) = (__u16)d; wmb();} while(0) +static inline u8 ReadDOC_(u16 __iomem *addr, unsigned long reg) +{ + return __raw_readw(addr + reg); +} +static inline void WriteDOC_(u8 data, u16 __iomem *addr, unsigned long reg) +{ + __raw_writew(data, addr + reg); + wmb(); +} #define DOC_IOREMAP_LEN 0x4000 #else #define ReadDOC_(adr, reg) readb((void __iomem *)(adr) + (reg)) -- cgit v1.2.3 From 5d27aa5af04f58f3020de1c224dcf8a62151fd58 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Nov 2012 22:55:28 +0100 Subject: mtd: uninitialized variable warning in map.h The map_word_load() function initializes exactly as many words in the buffer as required, but gcc cannot figure this out and gives a misleading warning. Marking the local variable as uninitialized_var shuts up that warning. Without this patch, building acs5k_defconfig results in: drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_panic_write': include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized] drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_write_words': include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized] drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words': include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized] Signed-off-by: Arnd Bergmann Signed-off-by: Artem Bityutskiy --- include/linux/mtd/map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 56c7936e0c65..f6eb4332ac92 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -391,7 +391,7 @@ static inline map_word map_word_ff(struct map_info *map) static inline map_word inline_map_read(struct map_info *map, unsigned long ofs) { - map_word r; + map_word uninitialized_var(r); if (map_bankwidth_is_1(map)) r.x[0] = __raw_readb(map->virt + ofs); -- cgit v1.2.3 From 0857ba3c24c308f42a242fe8a1894772750230ce Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 18 Nov 2012 18:36:19 +0200 Subject: i2c: i2c-cbus-gpio: introduce driver Add i2c driver to enable access to devices behind CBUS on Nokia Internet Tablets. The patch also adds CBUS I2C configuration for N8x0 which is one of the users of this driver. Acked-by: Felipe Balbi Acked-by: Tony Lindgren Signed-off-by: Aaro Koskinen Signed-off-by: Wolfram Sang --- include/linux/platform_data/i2c-cbus-gpio.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/platform_data/i2c-cbus-gpio.h (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-cbus-gpio.h b/include/linux/platform_data/i2c-cbus-gpio.h new file mode 100644 index 000000000000..6faa992a9502 --- /dev/null +++ b/include/linux/platform_data/i2c-cbus-gpio.h @@ -0,0 +1,27 @@ +/* + * i2c-cbus-gpio.h - CBUS I2C platform_data definition + * + * Copyright (C) 2004-2009 Nokia Corporation + * + * Written by Felipe Balbi and Aaro Koskinen. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of this + * archive for more details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __INCLUDE_LINUX_I2C_CBUS_GPIO_H +#define __INCLUDE_LINUX_I2C_CBUS_GPIO_H + +struct i2c_cbus_platform_data { + int dat_gpio; + int clk_gpio; + int sel_gpio; +}; + +#endif /* __INCLUDE_LINUX_I2C_CBUS_GPIO_H */ -- cgit v1.2.3 From ae72ae676045274c82f3c25159a9dd7cfcf5ffae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 11:02:55 -0500 Subject: NFSv4.1: Don't confuse CREATE_SESSION arguments and results Don't store the target request and response sizes in the same variables used to store the server's replies to those targets. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a9e76ee1adca..97c8f9191880 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -242,6 +242,9 @@ struct nfs4_session { struct nfs4_channel_attrs bc_attrs; struct nfs4_slot_table bc_slot_table; struct nfs_client *clp; + /* Create session arguments */ + unsigned int fc_target_max_rqst_sz; + unsigned int fc_target_max_resp_sz; }; #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From 933602e368c4452260c9bff4fbb3baba35cf987a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:12:38 -0500 Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving sr_renewal_time Store the renewal time inside the session slot instead. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a73ea89789d1..9cb1c63a70c2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -187,6 +187,7 @@ struct nfs4_channel_attrs { /* nfs41 sessions slot seqid */ struct nfs4_slot { + unsigned long renewal_time; u32 seq_nr; }; @@ -200,7 +201,6 @@ struct nfs4_sequence_res { struct nfs4_session *sr_session; struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ - unsigned long sr_renewal_time; u32 sr_status_flags; }; -- cgit v1.2.3 From 22a8578fca5a47e643bb4f70c232d0ec84db9e4e Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Sat, 10 Nov 2012 13:08:20 -0300 Subject: mtd: mtd_blkdevs: Replace request handler kthread with a workqueue By replacing a kthread with a workqueue, the code is now a bit clearer. There's also a slight reduction of code size (numbers apply for x86): Before: text data bss dec hex filename 3248 36 0 3284 cd4 drivers/mtd/mtd_blkdevs.o After: text data bss dec hex filename 3150 36 0 3186 c72 drivers/mtd/mtd_blkdevs.o Due to lack of real hardware, tests have been performed on an emulated environment with mtdswap and mtdblock over nandsim devices. Some real testing should be done, before merging this patch. Signed-off-by: Ezequiel Garcia Signed-off-by: Artem Bityutskiy --- include/linux/mtd/blktrans.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index ed270bd2e4df..4eb0a50d0c55 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -23,6 +23,7 @@ #include #include #include +#include struct hd_geometry; struct mtd_info; @@ -43,7 +44,8 @@ struct mtd_blktrans_dev { struct kref ref; struct gendisk *disk; struct attribute_group *disk_attributes; - struct task_struct *thread; + struct workqueue_struct *wq; + struct work_struct work; struct request_queue *rq; spinlock_t queue_lock; void *priv; -- cgit v1.2.3 From 8d4b9e3182634d8b5afb5a144a8c6c24b187bcc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 12 Nov 2012 13:03:20 +0100 Subject: bcma: export PLL reading function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required by NAND flash driver for initializing wait counters. Signed-off-by: Rafał Miłecki Signed-off-by: Artem Bityutskiy --- include/linux/bcma/bcma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 4180eb78d575..4fb6bd7941d7 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -345,6 +345,7 @@ extern void bcma_core_set_clockmode(struct bcma_device *core, enum bcma_clkmode clkmode); extern void bcma_core_pll_ctl(struct bcma_device *core, u32 req, u32 status, bool on); +extern u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset); #define BCMA_DMA_TRANSLATION_MASK 0xC0000000 #define BCMA_DMA_TRANSLATION_NONE 0x00000000 #define BCMA_DMA_TRANSLATION_DMA32_CMT 0x40000000 /* Client Mode Translation for 32-bit DMA */ -- cgit v1.2.3 From 83af24027b3df1af5c5a9aa9adcdcfeb3429d3be Mon Sep 17 00:00:00 2001 From: "Philip, Avinash" Date: Wed, 21 Nov 2012 13:10:44 +0530 Subject: pwm: Device tree support for PWM polarity Add support for encoding PWM properties in bit encoded form with of_pwm_xlate_with_flags() function support. Platforms require platform specific PWM properties has to populate in 3rd cell of the pwm-specifier and PWM driver should also set .of_xlate support with this function. Currently PWM property polarity encoded in bit position 0 of the third cell in pwm-specifier. Signed-off-by: Philip, Avinash Acked-by: Grant Likely Signed-off-by: Thierry Reding --- include/linux/pwm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 112b31436848..6d661f32e0e4 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -171,6 +171,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, const char *label); +struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, + const struct of_phandle_args *args); + struct pwm_device *pwm_get(struct device *dev, const char *consumer); void pwm_put(struct pwm_device *pwm); -- cgit v1.2.3 From e3725ec015dfbbeb896295cf2b3a995f28b0630e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:25:01 -0500 Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving the session pointer Move the session pointer into the slot table, then have struct nfs4_slot point to that slot table. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 97c8f9191880..b0412873d29c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -209,6 +209,7 @@ struct nfs_server { /* Sessions */ #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) struct nfs4_slot_table { + struct nfs4_session *session; /* Parent session */ struct nfs4_slot *slots; /* seqid per slot */ unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ spinlock_t slot_tbl_lock; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9cb1c63a70c2..0fd88ab0e814 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -187,6 +187,7 @@ struct nfs4_channel_attrs { /* nfs41 sessions slot seqid */ struct nfs4_slot { + struct nfs4_slot_table *table; unsigned long renewal_time; u32 seq_nr; }; @@ -198,7 +199,6 @@ struct nfs4_sequence_args { }; struct nfs4_sequence_res { - struct nfs4_session *sr_session; struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ u32 sr_status_flags; -- cgit v1.2.3 From df2fabffbace8988f3265585ec793ff9deccdea7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:45:06 -0500 Subject: NFSv4.1: Label each entry in the session slot tables with its slot number Instead of doing slot table pointer gymnastics every time we want to know which slot we're using. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0fd88ab0e814..9c9b76c94b46 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -189,6 +189,7 @@ struct nfs4_channel_attrs { struct nfs4_slot { struct nfs4_slot_table *table; unsigned long renewal_time; + u32 slot_nr; u32 seq_nr; }; -- cgit v1.2.3 From 2b2fa71723f955d5b4a0f4edd99cf3cd69ceafd1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:58:36 -0500 Subject: NFSv4.1: Simplify struct nfs4_sequence_args too Replace the session pointer + slotid with a pointer to the allocated slot. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9c9b76c94b46..deb31bbbb857 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -194,8 +194,7 @@ struct nfs4_slot { }; struct nfs4_sequence_args { - struct nfs4_session *sa_session; - u32 sa_slotid; + struct nfs4_slot *sa_slot; u8 sa_cache_this; }; -- cgit v1.2.3 From 31fbcda71489d8cbe2b82819eaab4818524e3a49 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 28 Sep 2012 10:29:07 +0100 Subject: Input: bu21013_ts - move GPIO init and exit functions into the driver These GPIO init and exit functions have no place in platform data, they should be part of the driver instead, Acked-by: Arnd Bergmann Acked-by: Linus Walleij Signed-off-by: Lee Jones Signed-off-by: Dmitry Torokhov --- include/linux/input/bu21013.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h index 05e03284b92a..6230d76bde5d 100644 --- a/include/linux/input/bu21013.h +++ b/include/linux/input/bu21013.h @@ -9,13 +9,10 @@ /** * struct bu21013_platform_device - Handle the platform data - * @cs_en: pointer to the cs enable function - * @cs_dis: pointer to the cs disable function - * @irq_read_val: pointer to read the pen irq value function * @touch_x_max: touch x max * @touch_y_max: touch y max * @cs_pin: chip select pin - * @irq: irq pin + * @touch_pin: touch gpio pin * @ext_clk: external clock flag * @x_flip: x flip flag * @y_flip: y flip flag @@ -24,13 +21,10 @@ * This is used to handle the platform data */ struct bu21013_platform_device { - int (*cs_en)(int reset_pin); - int (*cs_dis)(int reset_pin); - int (*irq_read_val)(void); int touch_x_max; int touch_y_max; unsigned int cs_pin; - unsigned int irq; + unsigned int touch_pin; bool ext_clk; bool x_flip; bool y_flip; -- cgit v1.2.3 From 972deb4f49b5b6703d9c6117ba0aeda2180d4447 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti D Date: Mon, 26 Nov 2012 15:25:11 +0530 Subject: i2c: omap: Remove the OMAP_I2C_FLAG_RESET_REGS_POSTIDLE flag The OMAP_I2C_FLAG_RESET_REGS_POSTIDLE is not used anymore in the i2c driver. Remove the flag. Signed-off-by: Shubhrajyoti D Reviewed-by: Felipe Balbi Signed-off-by: Wolfram Sang --- include/linux/i2c-omap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h index 1b25c04f82d9..babe0cf6d56b 100644 --- a/include/linux/i2c-omap.h +++ b/include/linux/i2c-omap.h @@ -20,7 +20,6 @@ #define OMAP_I2C_FLAG_NO_FIFO BIT(0) #define OMAP_I2C_FLAG_SIMPLE_CLOCK BIT(1) #define OMAP_I2C_FLAG_16BIT_DATA_REG BIT(2) -#define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE BIT(3) #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK BIT(5) #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK BIT(6) /* how the CPU address bus must be translated for I2C unit access */ -- cgit v1.2.3 From 64b37b2a63eb2f80b65c7185f0013f8ffc637ae3 Mon Sep 17 00:00:00 2001 From: Matthieu CASTET Date: Tue, 6 Nov 2012 11:51:44 +0100 Subject: mtd: nand: add NAND_BUSWIDTH_AUTO to autodetect bus width The driver call nand_scan_ident in 8 bit mode, then readid or onfi detection are done (and detect bus width). The driver should update its bus width before calling nand_scan_tail. This work because readid and onfi are read work 8 byte mode. Note that nand_scan_ident send command (NAND_CMD_RESET, NAND_CMD_READID, NAND_CMD_PARAM), address and read data The ONFI specificication is not very clear for x16 device if high byte of address should be driven to 0, but according to [1] it should be ok to not drive it during autodetection. [1] 3.3.2. Target Initialization [...] The Read ID and Read Parameter Page commands only use the lower 8-bits of the data bus. The host shall not issue commands that use a word data width on x16 devices until the host determines the device supports a 16-bit data bus width in the parameter page. Signed-off-by: Matthieu CASTET Signed-off-by: Artem Bityutskiy --- include/linux/mtd/nand.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9d8a6048aacd..7ccb3c59ed60 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -219,6 +219,13 @@ typedef enum { #define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 +/* + * Autodetect nand buswidth with readid/onfi. + * This suppose the driver will configure the hardware in 8 bits mode + * when calling nand_scan_ident, and update its configuration + * before calling nand_scan_tail. + */ +#define NAND_BUSWIDTH_AUTO 0x00080000 /* Options set by nand scan */ /* Nand scan has allocated controller struct */ -- cgit v1.2.3 From 72b15b6ae97796c5fac687addde5dbfab872cf94 Mon Sep 17 00:00:00 2001 From: Omar Ramirez Luna Date: Mon, 19 Nov 2012 19:05:50 -0600 Subject: iommu/omap: Migrate to hwmod framework Use hwmod data and device attributes to build and register an omap device for iommu driver. - Update the naming convention in isp module. - Remove unneeded check for number of resources, as this is now handled by omap_device and prevents driver from loading. - Now unused, remove platform device and resource data, handling of sysconfig register for softreset purposes, use default latency structure. - Use hwmod API for reset handling. Signed-off-by: Omar Ramirez Luna Tested-by: Ohad Ben-Cohen Signed-off-by: Joerg Roedel --- include/linux/platform_data/iommu-omap.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index c677b9f2fefa..ef2060d7eeb8 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#include + #define MMU_REG_SIZE 256 /** @@ -43,7 +45,11 @@ struct omap_mmu_dev_attr { struct iommu_platform_data { const char *name; const char *clk_name; - const int nr_tlb_entries; + const char *reset_name; + int nr_tlb_entries; u32 da_start; u32 da_end; + + int (*assert_reset)(struct platform_device *pdev, const char *name); + int (*deassert_reset)(struct platform_device *pdev, const char *name); }; -- cgit v1.2.3 From ebf7cda0f92effd8169b831fae81e9437dce1fef Mon Sep 17 00:00:00 2001 From: Omar Ramirez Luna Date: Mon, 19 Nov 2012 19:05:51 -0600 Subject: iommu/omap: Adapt to runtime pm Use runtime PM functionality interfaced with hwmod enable/idle functions, to replace direct clock operations and sysconfig handling. Due to reset sequence, pm_runtime_[get|put]_sync must be used, to avoid possible operations with the module under reset. Because of this and given that the driver uses spin_locks to protect their critical sections, we must use pm_runtime_irq_safe in order for the runtime ops to be happy, otherwise might_sleep_if checks in runtime framework will complain. The remaining pm_runtime out of iommu_enable and iommu_disable corresponds to paths that can be accessed through debugfs, some of them doesn't work if the module is not enabled first, but in future if the mmu is idled withouth freeing, these are needed to debug. Signed-off-by: Omar Ramirez Luna Tested-by: Ohad Ben-Cohen Acked-by: Tony Lindgren Signed-off-by: Joerg Roedel --- include/linux/platform_data/iommu-omap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index ef2060d7eeb8..5b429c43a297 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -44,7 +44,6 @@ struct omap_mmu_dev_attr { struct iommu_platform_data { const char *name; - const char *clk_name; const char *reset_name; int nr_tlb_entries; u32 da_start; -- cgit v1.2.3 From cc248d4b1ddf05fefc1373d9d7a4dd1df71b6190 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Dec 2012 16:11:13 -0500 Subject: svcrpc: don't byte-swap sk_reclen in place Byte-swapping in place is always a little dubious. Let's instead define this field to always be big-endian, and do the swapping on demand where we need it. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcsock.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 92ad02f0dcc0..613cf42227aa 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -26,11 +26,21 @@ struct svc_sock { void (*sk_owspace)(struct sock *); /* private TCP part */ - u32 sk_reclen; /* length of record */ + __be32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; +static inline u32 svc_sock_reclen(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; +} + +static inline u32 svc_sock_final_rec(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; +} + /* * Function prototypes. */ -- cgit v1.2.3 From 8af345f58ac9b350bb23c1457c613381d9f00472 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Dec 2012 16:45:35 -0500 Subject: svcrpc: track rpc data length separately from sk_tcplen Keep a separate field, sk_datalen, that tracks only the data contained in a fragment, not including the fragment header. For now, this is always just max(0, sk_tcplen - 4), but after we allow multiple fragments sk_datalen will accumulate the total rpc data size while sk_tcplen only tracks progress receiving the current fragment. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcsock.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 613cf42227aa..62fd1b756e99 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -26,8 +26,15 @@ struct svc_sock { void (*sk_owspace)(struct sock *); /* private TCP part */ - __be32 sk_reclen; /* length of record */ - u32 sk_tcplen; /* current read length */ + /* On-the-wire fragment header: */ + __be32 sk_reclen; + /* As we receive a record, this includes the length received so + * far (including the fragment header): */ + u32 sk_tcplen; + /* Total length of the data (not including fragment headers) + * received so far in the fragments making up this rpc: */ + u32 sk_datalen; + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; -- cgit v1.2.3 From 464ee9f966404786ba4c6be35dc8362ee8e6ba4e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 12:49:27 -0500 Subject: NFSv4.1: Ensure that the client tracks the server target_highest_slotid Dynamic slot allocation in NFSv4.1 depends on the client being able to track the server's target value for the highest slotid in the slot table. See the reference in Section 2.10.6.1 of RFC5661. To avoid ordering problems in the case where 2 SEQUENCE replies contain conflicting updates to this target value, we also introduce a generation counter, to track whether or not an RPC containing a SEQUENCE operation was launched before or after the last update. Also rename the nfs4_slot_table target_max_slots field to 'target_highest_slotid' to avoid confusion with a slot table size or number of slots. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 5 +++-- include/linux/nfs_xdr.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b0412873d29c..57d406997def 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -217,8 +217,9 @@ struct nfs4_slot_table { u32 max_slots; /* # slots in table */ u32 highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ - u32 target_max_slots; /* Set by CB_RECALL_SLOT as - * the new max_slots */ + u32 target_highest_slotid; /* Server max_slot target */ + unsigned long generation; /* Generation counter for + target_highest_slotid */ struct completion complete; }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index deb31bbbb857..08c47db7417f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,6 +188,7 @@ struct nfs4_channel_attrs { /* nfs41 sessions slot seqid */ struct nfs4_slot { struct nfs4_slot_table *table; + unsigned long generation; unsigned long renewal_time; u32 slot_nr; u32 seq_nr; @@ -202,6 +203,7 @@ struct nfs4_sequence_res { struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ u32 sr_status_flags; + u32 sr_target_highest_slotid; }; struct nfs4_get_lease_time_args { -- cgit v1.2.3 From da0507b7c95ccd4d9c86394eef42fe076032af30 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 18:10:30 -0500 Subject: NFSv4.1: Reset the sequence number for slots that have been deallocated When the server tells us that it is dynamically resizing the session replay cache, we should reset the sequence number for those slots that have been deallocated. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 57d406997def..646e64bbff4c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -218,6 +218,7 @@ struct nfs4_slot_table { u32 highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ u32 target_highest_slotid; /* Server max_slot target */ + u32 server_highest_slotid; /* Server highest slotid */ unsigned long generation; /* Generation counter for target_highest_slotid */ struct completion complete; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 08c47db7417f..3ddb08fba935 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -203,6 +203,7 @@ struct nfs4_sequence_res { struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ u32 sr_status_flags; + u32 sr_highest_slotid; u32 sr_target_highest_slotid; }; -- cgit v1.2.3 From 97e548a93de213b149eea025a97d88e28143b445 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 14:45:48 -0500 Subject: NFSv4.1: Support dynamic resizing of the session slot table Allow the server to control the size of the session slot table by adjusting the value of sr_target_max_slots in the reply to the SEQUENCE operation. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 646e64bbff4c..30715508fade 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -215,6 +215,7 @@ struct nfs4_slot_table { spinlock_t slot_tbl_lock; struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ u32 max_slots; /* # slots in table */ + u32 max_slotid; /* Max allowed slotid value */ u32 highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ u32 target_highest_slotid; /* Server max_slot target */ -- cgit v1.2.3 From 87dda67e7386ba7d2164391ea58b34e028d8157b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 19:49:20 -0500 Subject: NFSv4.1: Allow SEQUENCE to resize the slot table on the fly Instead of an array of slots, use a singly linked list of slots that can be dynamically appended to or shrunk. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3ddb08fba935..44d256f6021c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,6 +188,7 @@ struct nfs4_channel_attrs { /* nfs41 sessions slot seqid */ struct nfs4_slot { struct nfs4_slot_table *table; + struct nfs4_slot *next; unsigned long generation; unsigned long renewal_time; u32 slot_nr; -- cgit v1.2.3 From c34309a45ea491e5f0c0d0af49ccfa018ff35fc1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Nov 2012 14:33:03 -0500 Subject: NFS: Remove unused function slot_idx Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 30715508fade..e707c1b69796 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -225,11 +225,6 @@ struct nfs4_slot_table { struct completion complete; }; -static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp) -{ - return sp - tbl->slots; -} - /* * Session related parameters */ -- cgit v1.2.3 From 76e697ba7e8d187f50e385d21a2b2f1709a62c14 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Nov 2012 14:20:49 -0500 Subject: NFSv4.1: Move slot table and session struct definitions to nfs4session.h Clean up. Gather NFSv4.1 slot definitions in fs/nfs/nfs4session.h. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 49 ----------------------------------------------- include/linux/nfs_xdr.h | 11 +---------- 2 files changed, 1 insertion(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e707c1b69796..6c6ed153a9b4 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -198,53 +198,4 @@ struct nfs_server { #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) - -/* maximum number of slots to use */ -#define NFS4_DEF_SLOT_TABLE_SIZE (16U) -#define NFS4_MAX_SLOT_TABLE (256U) -#define NFS4_NO_SLOT ((u32)-1) - -#if IS_ENABLED(CONFIG_NFS_V4) - -/* Sessions */ -#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) -struct nfs4_slot_table { - struct nfs4_session *session; /* Parent session */ - struct nfs4_slot *slots; /* seqid per slot */ - unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ - spinlock_t slot_tbl_lock; - struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ - u32 max_slots; /* # slots in table */ - u32 max_slotid; /* Max allowed slotid value */ - u32 highest_used_slotid; /* sent to server on each SEQ. - * op for dynamic resizing */ - u32 target_highest_slotid; /* Server max_slot target */ - u32 server_highest_slotid; /* Server highest slotid */ - unsigned long generation; /* Generation counter for - target_highest_slotid */ - struct completion complete; -}; - -/* - * Session related parameters - */ -struct nfs4_session { - struct nfs4_sessionid sess_id; - u32 flags; - unsigned long session_state; - u32 hash_alg; - u32 ssv_len; - - /* The fore and back channel */ - struct nfs4_channel_attrs fc_attrs; - struct nfs4_slot_table fc_slot_table; - struct nfs4_channel_attrs bc_attrs; - struct nfs4_slot_table bc_slot_table; - struct nfs_client *clp; - /* Create session arguments */ - unsigned int fc_target_max_rqst_sz; - unsigned int fc_target_max_resp_sz; -}; - -#endif /* CONFIG_NFS_V4 */ #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 44d256f6021c..2076149db1a4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -185,16 +185,7 @@ struct nfs4_channel_attrs { u32 max_reqs; }; -/* nfs41 sessions slot seqid */ -struct nfs4_slot { - struct nfs4_slot_table *table; - struct nfs4_slot *next; - unsigned long generation; - unsigned long renewal_time; - u32 slot_nr; - u32 seq_nr; -}; - +struct nfs4_slot; struct nfs4_sequence_args { struct nfs4_slot *sa_slot; u8 sa_cache_this; -- cgit v1.2.3 From 8fe72bac8de784c4059b41a7dd6bb0151a3ae898 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Oct 2012 19:02:20 -0400 Subject: NFSv4: Clean up handling of privileged operations Privileged rpc calls are those that are run by the state recovery thread, in cases where we're trying to recover the system after a server reboot or a network partition. In those cases, we want to fence off all other rpc calls (see nfs4_begin_drain_session()) so that they don't end up using stateids or clientids that are in the process of being recovered. Prior to this patch, we had to set up special callback functions in order to declare an rpc call as being privileged. By adding a new field to the sequence arguments, this patch simplifies things considerably, and allows us to declare the rpc call as privileged before it is run. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2076149db1a4..baa673edb597 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,7 +188,8 @@ struct nfs4_channel_attrs { struct nfs4_slot; struct nfs4_sequence_args { struct nfs4_slot *sa_slot; - u8 sa_cache_this; + u8 sa_cache_this : 1, + sa_privileged : 1; }; struct nfs4_sequence_res { -- cgit v1.2.3 From 62ae082d883d167cdaa7895cf2972d85e178228a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Nov 2012 17:10:01 -0500 Subject: NFSv4: Reorder the XDR structures to put sequence at the top, not bottom Pre-condition for optimising the slot allocation and reintroducing FIFO behaviour. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 138 ++++++++++++++++++++++++------------------------ 1 file changed, 69 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index baa673edb597..a55abd499c21 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -205,8 +205,8 @@ struct nfs4_get_lease_time_args { }; struct nfs4_get_lease_time_res { - struct nfs_fsinfo *lr_fsinfo; struct nfs4_sequence_res lr_seq_res; + struct nfs_fsinfo *lr_fsinfo; }; #define PNFS_LAYOUT_MAXSIZE 4096 @@ -224,23 +224,23 @@ struct pnfs_layout_range { }; struct nfs4_layoutget_args { + struct nfs4_sequence_args seq_args; __u32 type; struct pnfs_layout_range range; __u64 minlength; __u32 maxcount; struct inode *inode; struct nfs_open_context *ctx; - struct nfs4_sequence_args seq_args; nfs4_stateid stateid; struct nfs4_layoutdriver_data layout; }; struct nfs4_layoutget_res { + struct nfs4_sequence_res seq_res; __u32 return_on_close; struct pnfs_layout_range range; __u32 type; nfs4_stateid stateid; - struct nfs4_sequence_res seq_res; struct nfs4_layoutdriver_data *layoutp; }; @@ -251,38 +251,38 @@ struct nfs4_layoutget { }; struct nfs4_getdevicelist_args { + struct nfs4_sequence_args seq_args; const struct nfs_fh *fh; u32 layoutclass; - struct nfs4_sequence_args seq_args; }; struct nfs4_getdevicelist_res { - struct pnfs_devicelist *devlist; struct nfs4_sequence_res seq_res; + struct pnfs_devicelist *devlist; }; struct nfs4_getdeviceinfo_args { - struct pnfs_device *pdev; struct nfs4_sequence_args seq_args; + struct pnfs_device *pdev; }; struct nfs4_getdeviceinfo_res { - struct pnfs_device *pdev; struct nfs4_sequence_res seq_res; + struct pnfs_device *pdev; }; struct nfs4_layoutcommit_args { + struct nfs4_sequence_args seq_args; nfs4_stateid stateid; __u64 lastbytewritten; struct inode *inode; const u32 *bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_layoutcommit_res { + struct nfs4_sequence_res seq_res; struct nfs_fattr *fattr; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; int status; }; @@ -296,11 +296,11 @@ struct nfs4_layoutcommit_data { }; struct nfs4_layoutreturn_args { + struct nfs4_sequence_args seq_args; struct pnfs_layout_hdr *layout; struct inode *inode; nfs4_stateid stateid; __u32 layout_type; - struct nfs4_sequence_args seq_args; }; struct nfs4_layoutreturn_res { @@ -326,6 +326,7 @@ struct stateowner_id { * Arguments to the open call. */ struct nfs_openargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; struct nfs_seqid * seqid; int open_flags; @@ -346,10 +347,10 @@ struct nfs_openargs { const u32 * bitmask; const u32 * open_bitmap; __u32 claim; - struct nfs4_sequence_args seq_args; }; struct nfs_openres { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_fh fh; struct nfs4_change_info cinfo; @@ -364,7 +365,6 @@ struct nfs_openres { __u32 attrset[NFS4_BITMAP_SIZE]; struct nfs4_string *owner; struct nfs4_string *group_owner; - struct nfs4_sequence_res seq_res; __u32 access_request; __u32 access_supported; __u32 access_result; @@ -388,20 +388,20 @@ struct nfs_open_confirmres { * Arguments to the close call. */ struct nfs_closeargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; nfs4_stateid * stateid; struct nfs_seqid * seqid; fmode_t fmode; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs_closeres { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_fattr * fattr; struct nfs_seqid * seqid; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; }; /* * * Arguments to the lock,lockt, and locku call. @@ -413,6 +413,7 @@ struct nfs_lowner { }; struct nfs_lock_args { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct file_lock * fl; struct nfs_seqid * lock_seqid; @@ -423,40 +424,39 @@ struct nfs_lock_args { unsigned char block : 1; unsigned char reclaim : 1; unsigned char new_lock_owner : 1; - struct nfs4_sequence_args seq_args; }; struct nfs_lock_res { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_seqid * lock_seqid; struct nfs_seqid * open_seqid; - struct nfs4_sequence_res seq_res; }; struct nfs_locku_args { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct file_lock * fl; struct nfs_seqid * seqid; nfs4_stateid * stateid; - struct nfs4_sequence_args seq_args; }; struct nfs_locku_res { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_seqid * seqid; - struct nfs4_sequence_res seq_res; }; struct nfs_lockt_args { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct file_lock * fl; struct nfs_lowner lock_owner; - struct nfs4_sequence_args seq_args; }; struct nfs_lockt_res { - struct file_lock * denied; /* LOCK, LOCKT failed */ struct nfs4_sequence_res seq_res; + struct file_lock * denied; /* LOCK, LOCKT failed */ }; struct nfs_release_lockowner_args { @@ -464,22 +464,23 @@ struct nfs_release_lockowner_args { }; struct nfs4_delegreturnargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_delegreturnres { + struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; }; /* * Arguments to the read call. */ struct nfs_readargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; @@ -487,20 +488,20 @@ struct nfs_readargs { __u32 count; unsigned int pgbase; struct page ** pages; - struct nfs4_sequence_args seq_args; }; struct nfs_readres { + struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; __u32 count; int eof; - struct nfs4_sequence_res seq_res; }; /* * Arguments to the write call. */ struct nfs_writeargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; @@ -510,7 +511,6 @@ struct nfs_writeargs { unsigned int pgbase; struct page ** pages; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs_write_verifier { @@ -523,65 +523,65 @@ struct nfs_writeverf { }; struct nfs_writeres { + struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; struct nfs_writeverf * verf; __u32 count; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; }; /* * Arguments to the commit call. */ struct nfs_commitargs { + struct nfs4_sequence_args seq_args; struct nfs_fh *fh; __u64 offset; __u32 count; const u32 *bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs_commitres { + struct nfs4_sequence_res seq_res; struct nfs_fattr *fattr; struct nfs_writeverf *verf; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; }; /* * Common arguments to the unlink call */ struct nfs_removeargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh *fh; struct qstr name; - struct nfs4_sequence_args seq_args; }; struct nfs_removeres { + struct nfs4_sequence_res seq_res; const struct nfs_server *server; struct nfs_fattr *dir_attr; struct nfs4_change_info cinfo; - struct nfs4_sequence_res seq_res; }; /* * Common arguments to the rename call */ struct nfs_renameargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh *old_dir; const struct nfs_fh *new_dir; const struct qstr *old_name; const struct qstr *new_name; - struct nfs4_sequence_args seq_args; }; struct nfs_renameres { + struct nfs4_sequence_res seq_res; const struct nfs_server *server; struct nfs4_change_info old_cinfo; struct nfs_fattr *old_fattr; struct nfs4_change_info new_cinfo; struct nfs_fattr *new_fattr; - struct nfs4_sequence_res seq_res; }; /* @@ -622,20 +622,20 @@ struct nfs_createargs { }; struct nfs_setattrargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; nfs4_stateid stateid; struct iattr * iap; const struct nfs_server * server; /* Needed for name mapping */ const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs_setaclargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; unsigned int acl_pgbase; struct page ** acl_pages; - struct nfs4_sequence_args seq_args; }; struct nfs_setaclres { @@ -643,27 +643,27 @@ struct nfs_setaclres { }; struct nfs_getaclargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; unsigned int acl_pgbase; struct page ** acl_pages; - struct nfs4_sequence_args seq_args; }; /* getxattr ACL interface flags */ #define NFS4_ACL_TRUNC 0x0001 /* ACL was truncated */ struct nfs_getaclres { + struct nfs4_sequence_res seq_res; size_t acl_len; size_t acl_data_offset; int acl_flags; struct page * acl_scratch; - struct nfs4_sequence_res seq_res; }; struct nfs_setattrres { + struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; const struct nfs_server * server; - struct nfs4_sequence_res seq_res; }; struct nfs_linkargs { @@ -828,21 +828,22 @@ struct nfs3_getaclres { typedef u64 clientid4; struct nfs4_accessargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; u32 access; - struct nfs4_sequence_args seq_args; }; struct nfs4_accessres { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; u32 supported; u32 access; - struct nfs4_sequence_res seq_res; }; struct nfs4_create_arg { + struct nfs4_sequence_args seq_args; u32 ftype; union { struct { @@ -859,88 +860,88 @@ struct nfs4_create_arg { const struct iattr * attrs; const struct nfs_fh * dir_fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_create_res { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fh * fh; struct nfs_fattr * fattr; struct nfs4_change_info dir_cinfo; - struct nfs4_sequence_res seq_res; }; struct nfs4_fsinfo_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_fsinfo_res { - struct nfs_fsinfo *fsinfo; struct nfs4_sequence_res seq_res; + struct nfs_fsinfo *fsinfo; }; struct nfs4_getattr_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_getattr_res { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; - struct nfs4_sequence_res seq_res; }; struct nfs4_link_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const struct nfs_fh * dir_fh; const struct qstr * name; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_link_res { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; struct nfs4_change_info cinfo; struct nfs_fattr * dir_attr; - struct nfs4_sequence_res seq_res; }; struct nfs4_lookup_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * dir_fh; const struct qstr * name; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_lookup_res { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; struct nfs_fh * fh; - struct nfs4_sequence_res seq_res; }; struct nfs4_lookup_root_arg { - const u32 * bitmask; struct nfs4_sequence_args seq_args; + const u32 * bitmask; }; struct nfs4_pathconf_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_pathconf_res { - struct nfs_pathconf *pathconf; struct nfs4_sequence_res seq_res; + struct nfs_pathconf *pathconf; }; struct nfs4_readdir_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; u64 cookie; nfs4_verifier verifier; @@ -949,21 +950,20 @@ struct nfs4_readdir_arg { unsigned int pgbase; /* zero-copy data */ const u32 * bitmask; int plus; - struct nfs4_sequence_args seq_args; }; struct nfs4_readdir_res { + struct nfs4_sequence_res seq_res; nfs4_verifier verifier; unsigned int pgbase; - struct nfs4_sequence_res seq_res; }; struct nfs4_readlink { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; unsigned int pgbase; unsigned int pglen; /* zero-copy data */ struct page ** pages; /* zero-copy data */ - struct nfs4_sequence_args seq_args; }; struct nfs4_readlink_res { @@ -989,28 +989,28 @@ struct nfs4_setclientid_res { }; struct nfs4_statfs_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_statfs_res { - struct nfs_fsstat *fsstat; struct nfs4_sequence_res seq_res; + struct nfs_fsstat *fsstat; }; struct nfs4_server_caps_arg { - struct nfs_fh *fhandle; struct nfs4_sequence_args seq_args; + struct nfs_fh *fhandle; }; struct nfs4_server_caps_res { + struct nfs4_sequence_res seq_res; u32 attr_bitmask[3]; u32 acl_bitmask; u32 has_links; u32 has_symlinks; u32 fh_expire_type; - struct nfs4_sequence_res seq_res; }; #define NFS4_PATHNAME_MAXCOMPONENTS 512 @@ -1036,16 +1036,16 @@ struct nfs4_fs_locations { }; struct nfs4_fs_locations_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh *dir_fh; const struct qstr *name; struct page *page; const u32 *bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_fs_locations_res { - struct nfs4_fs_locations *fs_locations; struct nfs4_sequence_res seq_res; + struct nfs4_fs_locations *fs_locations; }; struct nfs4_secinfo_oid { @@ -1070,14 +1070,14 @@ struct nfs4_secinfo_flavors { }; struct nfs4_secinfo_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh *dir_fh; const struct qstr *name; - struct nfs4_sequence_args seq_args; }; struct nfs4_secinfo_res { - struct nfs4_secinfo_flavors *flavors; struct nfs4_sequence_res seq_res; + struct nfs4_secinfo_flavors *flavors; }; #endif /* CONFIG_NFS_V4 */ @@ -1157,9 +1157,9 @@ struct nfs41_create_session_res { }; struct nfs41_reclaim_complete_args { + struct nfs4_sequence_args seq_args; /* In the future extend to include curr_fh for use with migration */ unsigned char one_fs:1; - struct nfs4_sequence_args seq_args; }; struct nfs41_reclaim_complete_res { @@ -1169,28 +1169,28 @@ struct nfs41_reclaim_complete_res { #define SECINFO_STYLE_CURRENT_FH 0 #define SECINFO_STYLE_PARENT 1 struct nfs41_secinfo_no_name_args { - int style; struct nfs4_sequence_args seq_args; + int style; }; struct nfs41_test_stateid_args { - nfs4_stateid *stateid; struct nfs4_sequence_args seq_args; + nfs4_stateid *stateid; }; struct nfs41_test_stateid_res { - unsigned int status; struct nfs4_sequence_res seq_res; + unsigned int status; }; struct nfs41_free_stateid_args { - nfs4_stateid *stateid; struct nfs4_sequence_args seq_args; + nfs4_stateid *stateid; }; struct nfs41_free_stateid_res { - unsigned int status; struct nfs4_sequence_res seq_res; + unsigned int status; }; #else -- cgit v1.2.3 From c05eecf636101dd4347b2d8fa457626bf0088e0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Nov 2012 23:59:29 -0500 Subject: SUNRPC: Don't allow low priority tasks to pre-empt higher priority ones Currently, the priority queues attempt to be 'fair' to lower priority tasks by scheduling them after a certain number of higher priority tasks have run. The problem is that both the transport send queue and the NFSv4.1 session slot queue have strong ordering requirements. This patch therefore removes the fairness code in favour of strong ordering of task priorities. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index dc0c3cc3ada3..b64f8eb0b973 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -192,7 +192,6 @@ struct rpc_wait_queue { pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ - unsigned char count; /* # task groups remaining serviced so far */ unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; -- cgit v1.2.3 From cf66bb93e0f75e0a4ba1ec070692618fa028e994 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 3 Dec 2012 16:25:40 +0000 Subject: byteorder: allow arch to opt to use GCC intrinsics for byteswapping Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16() intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC, 48 for other platforms). By using these instead of the inline assembler that most architectures have in their __arch_swabXX() macros, we let the compiler see what's actually happening. The resulting code should be at least as good, and much *better* in the cases where it can be combined with a nearby load or store, using a load-and-byteswap or store-and-byteswap instruction (e.g. lwbrx/stwbrx on PowerPC, movbe on Atom). When GCC is sufficiently recent *and* the architecture opts in to using the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be used in preference to the __arch_swabXX() macros. An architecture which does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own hand-crafted macros. Signed-off-by: David Woodhouse Acked-by: H. Peter Anvin --- include/linux/compiler-gcc4.h | 10 ++++++++++ include/linux/compiler-intel.h | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 412bc6c2b023..dc16a858e77c 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -63,3 +63,13 @@ #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) #endif + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#if __GNUC_MINOR__ >= 4 +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#endif +#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6) +#define __HAVE_BUILTIN_BSWAP16__ +#endif +#endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index d8e636e5607d..973ce10c40b6 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -29,3 +29,10 @@ #endif #define uninitialized_var(x) x + +#ifndef __HAVE_BUILTIN_BSWAP16__ +/* icc has this, but it's called _bswap16 */ +#define __HAVE_BUILTIN_BSWAP16__ +#define __builtin_bswap16 _bswap16 +#endif + -- cgit v1.2.3 From dd31866b0d55c9b70722ebad6ccd643223d9269e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 2 Nov 2012 17:06:26 +0900 Subject: f2fs: add on-disk layout This adds a header file describing the on-disk layout of f2fs. Signed-off-by: Changman Lee Signed-off-by: Chul Lee Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 410 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 include/linux/f2fs_fs.h (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h new file mode 100644 index 000000000000..1429ece7caab --- /dev/null +++ b/include/linux/f2fs_fs.h @@ -0,0 +1,410 @@ +/** + * include/linux/f2fs_fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_FS_H +#define _LINUX_F2FS_FS_H + +#include +#include + +#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ +#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ +#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ +#define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ + +#define NULL_ADDR 0x0U +#define NEW_ADDR -1U + +#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) +#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) +#define F2FS_META_INO(sbi) (sbi->meta_ino_num) + +/* This flag is used by node and meta inodes, and by recovery */ +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) + +/* + * For further optimization on multi-head logs, on-disk layout supports maximum + * 16 logs by default. The number, 16, is expected to cover all the cases + * enoughly. The implementaion currently uses no more than 6 logs. + * Half the logs are used for nodes, and the other half are used for data. + */ +#define MAX_ACTIVE_LOGS 16 +#define MAX_ACTIVE_NODE_LOGS 8 +#define MAX_ACTIVE_DATA_LOGS 8 + +/* + * For superblock + */ +struct f2fs_super_block { + __le32 magic; /* Magic Number */ + __le16 major_ver; /* Major Version */ + __le16 minor_ver; /* Minor Version */ + __le32 log_sectorsize; /* log2 sector size in bytes */ + __le32 log_sectors_per_block; /* log2 # of sectors per block */ + __le32 log_blocksize; /* log2 block size in bytes */ + __le32 log_blocks_per_seg; /* log2 # of blocks per segment */ + __le32 segs_per_sec; /* # of segments per section */ + __le32 secs_per_zone; /* # of sections per zone */ + __le32 checksum_offset; /* checksum offset inside super block */ + __le64 block_count; /* total # of user blocks */ + __le32 section_count; /* total # of sections */ + __le32 segment_count; /* total # of segments */ + __le32 segment_count_ckpt; /* # of segments for checkpoint */ + __le32 segment_count_sit; /* # of segments for SIT */ + __le32 segment_count_nat; /* # of segments for NAT */ + __le32 segment_count_ssa; /* # of segments for SSA */ + __le32 segment_count_main; /* # of segments for main area */ + __le32 segment0_blkaddr; /* start block address of segment 0 */ + __le32 cp_blkaddr; /* start block address of checkpoint */ + __le32 sit_blkaddr; /* start block address of SIT */ + __le32 nat_blkaddr; /* start block address of NAT */ + __le32 ssa_blkaddr; /* start block address of SSA */ + __le32 main_blkaddr; /* start block address of main area */ + __le32 root_ino; /* root inode number */ + __le32 node_ino; /* node inode number */ + __le32 meta_ino; /* meta inode number */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __le16 volume_name[512]; /* volume name */ + __le32 extension_count; /* # of extensions below */ + __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ +} __packed; + +/* + * For checkpoint + */ +#define CP_ERROR_FLAG 0x00000008 +#define CP_COMPACT_SUM_FLAG 0x00000004 +#define CP_ORPHAN_PRESENT_FLAG 0x00000002 +#define CP_UMOUNT_FLAG 0x00000001 + +struct f2fs_checkpoint { + __le64 checkpoint_ver; /* checkpoint block version number */ + __le64 user_block_count; /* # of user blocks */ + __le64 valid_block_count; /* # of valid blocks in main area */ + __le32 rsvd_segment_count; /* # of reserved segments for gc */ + __le32 overprov_segment_count; /* # of overprovision segments */ + __le32 free_segment_count; /* # of free segments in main area */ + + /* information of current node segments */ + __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS]; + __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS]; + /* information of current data segments */ + __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS]; + __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS]; + __le32 ckpt_flags; /* Flags : umount and journal_present */ + __le32 cp_pack_total_block_count; /* total # of one cp pack */ + __le32 cp_pack_start_sum; /* start block number of data summary */ + __le32 valid_node_count; /* Total number of valid nodes */ + __le32 valid_inode_count; /* Total number of valid inodes */ + __le32 next_free_nid; /* Next free node number */ + __le32 sit_ver_bitmap_bytesize; /* Default value 64 */ + __le32 nat_ver_bitmap_bytesize; /* Default value 256 */ + __le32 checksum_offset; /* checksum offset inside cp block */ + __le64 elapsed_time; /* mounted time */ + /* allocation type of current segment */ + unsigned char alloc_type[MAX_ACTIVE_LOGS]; + + /* SIT and NAT version bitmap */ + unsigned char sit_nat_version_bitmap[1]; +} __packed; + +/* + * For orphan inode management + */ +#define F2FS_ORPHANS_PER_BLOCK 1020 + +struct f2fs_orphan_block { + __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ + __le32 reserved; /* reserved */ + __le16 blk_addr; /* block index in current CP */ + __le16 blk_count; /* Number of orphan inode blocks in CP */ + __le32 entry_count; /* Total number of orphan nodes in current CP */ + __le32 check_sum; /* CRC32 for orphan inode block */ +} __packed; + +/* + * For NODE structure + */ +struct f2fs_extent { + __le32 fofs; /* start file offset of the extent */ + __le32 blk_addr; /* start block address of the extent */ + __le32 len; /* lengh of the extent */ +} __packed; + +#define F2FS_MAX_NAME_LEN 256 +#define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ + +struct f2fs_inode { + __le16 i_mode; /* file mode */ + __u8 i_advise; /* file hints */ + __u8 i_reserved; /* reserved */ + __le32 i_uid; /* user ID */ + __le32 i_gid; /* group ID */ + __le32 i_links; /* links count */ + __le64 i_size; /* file size in bytes */ + __le64 i_blocks; /* file size in blocks */ + __le64 i_atime; /* access time */ + __le64 i_ctime; /* change time */ + __le64 i_mtime; /* modification time */ + __le32 i_atime_nsec; /* access time in nano scale */ + __le32 i_ctime_nsec; /* change time in nano scale */ + __le32 i_mtime_nsec; /* modification time in nano scale */ + __le32 i_generation; /* file version (for NFS) */ + __le32 i_current_depth; /* only for directory depth */ + __le32 i_xattr_nid; /* nid to save xattr */ + __le32 i_flags; /* file attributes */ + __le32 i_pino; /* parent inode number */ + __le32 i_namelen; /* file name length */ + __u8 i_name[F2FS_MAX_NAME_LEN]; /* file name for SPOR */ + + struct f2fs_extent i_ext; /* caching a largest extent */ + + __le32 i_addr[ADDRS_PER_INODE]; /* Pointers to data blocks */ + + __le32 i_nid[5]; /* direct(2), indirect(2), + double_indirect(1) node id */ +} __packed; + +struct direct_node { + __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */ +} __packed; + +struct indirect_node { + __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */ +} __packed; + +enum { + COLD_BIT_SHIFT = 0, + FSYNC_BIT_SHIFT, + DENT_BIT_SHIFT, + OFFSET_BIT_SHIFT +}; + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode nunmber */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +struct f2fs_node { + /* can be one of three types: inode, direct, and indirect types */ + union { + struct f2fs_inode i; + struct direct_node dn; + struct indirect_node in; + }; + struct node_footer footer; +} __packed; + +/* + * For NAT entries + */ +#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) + +struct f2fs_nat_entry { + __u8 version; /* latest version of cached nat entry */ + __le32 ino; /* inode number */ + __le32 block_addr; /* block address */ +} __packed; + +struct f2fs_nat_block { + struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For SIT entries + * + * Each segment is 2MB in size by default so that a bitmap for validity of + * there-in blocks should occupy 64 bytes, 512 bits. + * Not allow to change this. + */ +#define SIT_VBLOCK_MAP_SIZE 64 +#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) + +/* + * Note that f2fs_sit_entry->vblocks has the following bit-field information. + * [15:10] : allocation type such as CURSEG_XXXX_TYPE + * [9:0] : valid block count + */ +#define SIT_VBLOCKS_SHIFT 10 +#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1) +#define GET_SIT_VBLOCKS(raw_sit) \ + (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK) +#define GET_SIT_TYPE(raw_sit) \ + ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \ + >> SIT_VBLOCKS_SHIFT) + +struct f2fs_sit_entry { + __le16 vblocks; /* reference above */ + __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */ + __le64 mtime; /* segment age for cleaning */ +} __packed; + +struct f2fs_sit_block { + struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For segment summary + * + * One summary block contains exactly 512 summary entries, which represents + * exactly 2MB segment by default. Not allow to change the basic units. + * + * NOTE: For initializing fields, you must use set_summary + * + * - If data page, nid represents dnode's nid + * - If node page, nid represents the node page's nid. + * + * The ofs_in_node is used by only data page. It represents offset + * from node's page's beginning to get a data block address. + * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) + */ +#define ENTRIES_IN_SUM 512 +#define SUMMARY_SIZE (sizeof(struct f2fs_summary)) +#define SUM_FOOTER_SIZE (sizeof(struct summary_footer)) +#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) + +/* a summary entry for a 4KB-sized block in a segment */ +struct f2fs_summary { + __le32 nid; /* parent node id */ + union { + __u8 reserved[3]; + struct { + __u8 version; /* node version number */ + __le16 ofs_in_node; /* block index in parent node */ + } __packed; + }; +} __packed; + +/* summary block type, node or data, is stored to the summary_footer */ +#define SUM_TYPE_NODE (1) +#define SUM_TYPE_DATA (0) + +struct summary_footer { + unsigned char entry_type; /* SUM_TYPE_XXX */ + __u32 check_sum; /* summary checksum */ +} __packed; + +#define SUM_JOURNAL_SIZE (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\ + SUM_ENTRY_SIZE) +#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct nat_journal_entry)) +#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct nat_journal_entry)) +#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct sit_journal_entry)) +#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct sit_journal_entry)) +/* + * frequently updated NAT/SIT entries can be stored in the spare area in + * summary blocks + */ +enum { + NAT_JOURNAL = 0, + SIT_JOURNAL +}; + +struct nat_journal_entry { + __le32 nid; + struct f2fs_nat_entry ne; +} __packed; + +struct nat_journal { + struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; + __u8 reserved[NAT_JOURNAL_RESERVED]; +} __packed; + +struct sit_journal_entry { + __le32 segno; + struct f2fs_sit_entry se; +} __packed; + +struct sit_journal { + struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; + __u8 reserved[SIT_JOURNAL_RESERVED]; +} __packed; + +/* 4KB-sized summary block structure */ +struct f2fs_summary_block { + struct f2fs_summary entries[ENTRIES_IN_SUM]; + union { + __le16 n_nats; + __le16 n_sits; + }; + /* spare area is used by NAT or SIT journals */ + union { + struct nat_journal nat_j; + struct sit_journal sit_j; + }; + struct summary_footer footer; +} __packed; + +/* + * For directory operations + */ +#define F2FS_DOT_HASH 0 +#define F2FS_DDOT_HASH F2FS_DOT_HASH +#define F2FS_MAX_HASH (~((0x3ULL) << 62)) +#define F2FS_HASH_COL_BIT ((0x1ULL) << 63) + +typedef __le32 f2fs_hash_t; + +/* One directory entry slot covers 8bytes-long file name */ +#define F2FS_NAME_LEN 8 + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK 214 + +/* MAX level for dir lookup */ +#define MAX_DIR_HASH_DEPTH 63 + +#define SIZE_OF_DIR_ENTRY 11 /* by byte */ +#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ + BITS_PER_BYTE) +#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ + F2FS_NAME_LEN) * \ + NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) + +/* One directory entry slot representing F2FS_NAME_LEN-sized file name */ +struct f2fs_dir_entry { + __le32 hash_code; /* hash code of file name */ + __le32 ino; /* inode number */ + __le16 name_len; /* lengh of file name */ + __u8 file_type; /* file type */ +} __packed; + +/* 4KB-sized directory entry block */ +struct f2fs_dentry_block { + /* validity bitmap for directory entries in each block */ + __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; + __u8 reserved[SIZE_OF_RESERVED]; + struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN]; +} __packed; + +/* file types used in inode_info->flags */ +enum { + F2FS_FT_UNKNOWN, + F2FS_FT_REG_FILE, + F2FS_FT_DIR, + F2FS_FT_CHRDEV, + F2FS_FT_BLKDEV, + F2FS_FT_FIFO, + F2FS_FT_SOCK, + F2FS_FT_SYMLINK, + F2FS_FT_MAX +}; + +#endif /* _LINUX_F2FS_FS_H */ -- cgit v1.2.3 From 25ca923b2a766b9c93b63777ead351137533a623 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 28 Nov 2012 16:12:41 +0900 Subject: f2fs: fix endian conversion bugs reported by sparse This patch should resolve the bugs reported by the sparse tool. Initial reports were written by "kbuild test robot" managed by fengguang.wu. In my local machines, I've tested also by running: > make C=2 CF="-D__CHECK_ENDIAN__" Accordingly, I've found lots of warnings and bugs related to the endian conversion. And I've fixed all at this moment. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1429ece7caab..c2fbbc35c1e6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -272,8 +272,8 @@ struct f2fs_sit_block { * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) */ #define ENTRIES_IN_SUM 512 -#define SUMMARY_SIZE (sizeof(struct f2fs_summary)) -#define SUM_FOOTER_SIZE (sizeof(struct summary_footer)) +#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ +#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ #define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) /* a summary entry for a 4KB-sized block in a segment */ @@ -297,7 +297,7 @@ struct summary_footer { __u32 check_sum; /* summary checksum */ } __packed; -#define SUM_JOURNAL_SIZE (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\ +#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\ SUM_ENTRY_SIZE) #define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ sizeof(struct nat_journal_entry)) -- cgit v1.2.3 From 457d08ee4fd91c8df17917ff2d32565e6adacbfc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 8 Dec 2012 14:54:50 +0900 Subject: f2fs: introduce accessor to retrieve number of dentry slots Simplify code by providing the accessor macro to retrieve the number of dentry slots for a given filename length. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat --- include/linux/f2fs_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c2fbbc35c1e6..f9a12f6243a5 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -363,6 +363,9 @@ typedef __le32 f2fs_hash_t; /* One directory entry slot covers 8bytes-long file name */ #define F2FS_NAME_LEN 8 +#define F2FS_NAME_LEN_BITS 3 + +#define GET_DENTRY_SLOTS(x) ((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS) /* the number of dentry in a block */ #define NR_DENTRY_IN_BLOCK 214 -- cgit v1.2.3 From 3c58346525d82625e68e24f071804c2dc057b6f4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 28 Nov 2012 16:23:01 +0000 Subject: slab: Simplify bootstrap The nodelists field in kmem_cache is pointing to the first unused object in the array field when bootstrap is complete. A problem with the current approach is that the statically sized kmem_cache structure use on boot can only contain NR_CPUS entries. If the number of nodes plus the number of cpus is greater then we would overwrite memory following the kmem_cache_boot definition. Increase the size of the array field to ensure that also the node pointers fit into the array field. Once we do that we no longer need the kmem_cache_nodelists array and we can then also use that structure elsewhere. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index cc290f0bdb34..45c0356fdc8c 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -89,9 +89,13 @@ struct kmem_cache { * (see kmem_cache_init()) * We still use [NR_CPUS] and not [1] or [0] because cache_cache * is statically defined, so we reserve the max number of cpus. + * + * We also need to guarantee that the list is able to accomodate a + * pointer for each node since "nodelists" uses the remainder of + * available pointers. */ struct kmem_list3 **nodelists; - struct array_cache *array[NR_CPUS]; + struct array_cache *array[NR_CPUS + MAX_NUMNODES]; /* * Do not add fields after array[] */ -- cgit v1.2.3 From d8153d4d8b7b6141770e1416c4a338161205ed1b Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:45 +0200 Subject: inotify, fanotify: replace fsnotify_put_group() with fsnotify_destroy_group() Currently in fsnotify_put_group() the ref count of a group is decremented and if it becomes 0 fsnotify_destroy_group() is called. Since a groups ref count is only at group creation set to 1 and never increased after that a call to fsnotify_put_group() always results in a call to fsnotify_destroy_group(). With this patch fsnotify_destroy_group() is called directly. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 63d966d5c2ea..d2ad345bdeec 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -364,7 +364,8 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); - +/* destroy group */ +extern void fsnotify_destroy_group(struct fsnotify_group *group); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); -- cgit v1.2.3 From 986129520479d689962a42c31acdeaf854ac91f5 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:46 +0200 Subject: fsnotify: introduce fsnotify_get_group() Introduce fsnotify_get_group() which increments the reference counter of a group. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d2ad345bdeec..e76cef75295d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -360,8 +360,10 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode /* called from fsnotify listeners, such as fanotify or dnotify */ -/* get a reference to an existing or create a new group */ +/* create a new group */ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); +/* get reference to a group */ +extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); /* destroy group */ -- cgit v1.2.3 From 986ab09807ca9454c3f54aae4db7e1bb00daeed3 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:50 +0200 Subject: fsnotify: use a mutex instead of a spinlock to protect a groups mark list Replaces the groups mark_lock spinlock with a mutex. Using a mutex instead of a spinlock results in more flexibility (i.e it allows to sleep while the lock is held). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e76cef75295d..c5848346840d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -141,7 +141,7 @@ struct fsnotify_group { unsigned int priority; /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ - spinlock_t mark_lock; /* protect marks_list */ + struct mutex mark_mutex; /* protect marks_list */ atomic_t num_marks; /* 1 for each mark and 1 for not being * past the point of no return when freeing * a group */ -- cgit v1.2.3 From e2a29943e9a2ee2aa737a77f550f46ba72269db4 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:51 +0200 Subject: fsnotify: pass group to fsnotify_destroy_mark() In fsnotify_destroy_mark() dont get the group from the passed mark anymore, but pass the group itself as an additional parameter to the function. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c5848346840d..140b4b8a100b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -408,8 +408,9 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); -/* given a mark, flag it to be freed when all references are dropped */ -extern void fsnotify_destroy_mark(struct fsnotify_mark *mark); +/* given a group and a mark, flag mark to be freed when all references are dropped */ +extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group); /* run all the marks in a group, and clear all of the vfsmount marks */ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the inode marks */ -- cgit v1.2.3 From d5a335b845792d2a69ed1e244c0b233117b7db3c Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:52 +0200 Subject: fsnotify: introduce locked versions of fsnotify_add_mark() and fsnotify_remove_mark() This patch introduces fsnotify_add_mark_locked() and fsnotify_remove_mark_locked() which are essentially the same as fsnotify_add_mark() and fsnotify_remove_mark() but assume that the caller has already taken the groups mark mutex. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 140b4b8a100b..26c06afa264e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -408,9 +408,13 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); +extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group, + struct inode *inode, struct vfsmount *mnt, int allow_dups); /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); +extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, + struct fsnotify_group *group); /* run all the marks in a group, and clear all of the vfsmount marks */ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the inode marks */ -- cgit v1.2.3 From 64c20d2a20fce295c260ea6cb3b468edfa2fb07b Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:53 +0200 Subject: fsnotify: dont put marks on temporary list when clearing marks by group In clear_marks_by_group_flags() the mark list of a group is iterated and the marks are put on a temporary list. Since we introduced fsnotify_destroy_mark_locked() we dont need the temp list any more and are able to remove the marks while the mark list is iterated and the mark list mutex is held. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 26c06afa264e..5a8899350456 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -287,7 +287,6 @@ struct fsnotify_mark { struct fsnotify_inode_mark i; struct fsnotify_vfsmount_mark m; }; - struct list_head free_g_list; /* tmp list used when freeing this mark */ __u32 ignored_mask; /* events types to ignore */ #define FSNOTIFY_MARK_FLAG_INODE 0x01 #define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02 -- cgit v1.2.3 From 6960b0d909cde5bdff49e4e5c1250edd10be7ebd Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 12 Aug 2011 01:13:31 +0200 Subject: fsnotify: change locking order On Mon, Aug 01, 2011 at 04:38:22PM -0400, Eric Paris wrote: > > I finally built and tested a v3.0 kernel with these patches (I know I'm > SOOOOOO far behind). Not what I hoped for: > > > [ 150.937798] VFS: Busy inodes after unmount of tmpfs. Self-destruct in 5 seconds. Have a nice day... > > [ 150.945290] BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 > > [ 150.946012] IP: [] shmem_free_inode+0x18/0x50 > > [ 150.946012] PGD 2bf9e067 PUD 2bf9f067 PMD 0 > > [ 150.946012] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC > > [ 150.946012] CPU 0 > > [ 150.946012] Modules linked in: nfs lockd fscache auth_rpcgss nfs_acl sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ext4 jbd2 crc16 joydev ata_piix i2c_piix4 pcspkr uinput ipv6 autofs4 usbhid [last unloaded: scsi_wait_scan] > > [ 150.946012] > > [ 150.946012] Pid: 2764, comm: syscall_thrash Not tainted 3.0.0+ #1 Red Hat KVM > > [ 150.946012] RIP: 0010:[] [] shmem_free_inode+0x18/0x50 > > [ 150.946012] RSP: 0018:ffff88002c2e5df8 EFLAGS: 00010282 > > [ 150.946012] RAX: 000000004e370d9f RBX: 0000000000000000 RCX: ffff88003a029438 > > [ 150.946012] RDX: 0000000033630a5f RSI: 0000000000000000 RDI: ffff88003491c240 > > [ 150.946012] RBP: ffff88002c2e5e08 R08: 0000000000000000 R09: 0000000000000000 > > [ 150.946012] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a029428 > > [ 150.946012] R13: ffff88003a029428 R14: ffff88003a029428 R15: ffff88003499a610 > > [ 150.946012] FS: 00007f5a05420700(0000) GS:ffff88003f600000(0000) knlGS:0000000000000000 > > [ 150.946012] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > [ 150.946012] CR2: 0000000000000070 CR3: 000000002a662000 CR4: 00000000000006f0 > > [ 150.946012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > [ 150.946012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > > [ 150.946012] Process syscall_thrash (pid: 2764, threadinfo ffff88002c2e4000, task ffff88002bfbc760) > > [ 150.946012] Stack: > > [ 150.946012] ffff88003a029438 ffff88003a029428 ffff88002c2e5e38 ffffffff81102f76 > > [ 150.946012] ffff88003a029438 ffff88003a029598 ffffffff8160f9c0 ffff88002c221250 > > [ 150.946012] ffff88002c2e5e68 ffffffff8115e9be ffff88002c2e5e68 ffff88003a029438 > > [ 150.946012] Call Trace: > > [ 150.946012] [] shmem_evict_inode+0x76/0x130 > > [ 150.946012] [] evict+0x7e/0x170 > > [ 150.946012] [] iput_final+0xd0/0x190 > > [ 150.946012] [] iput+0x33/0x40 > > [ 150.946012] [] fsnotify_destroy_mark_locked+0x145/0x160 > > [ 150.946012] [] fsnotify_destroy_mark+0x36/0x50 > > [ 150.946012] [] sys_inotify_rm_watch+0x77/0xd0 > > [ 150.946012] [] system_call_fastpath+0x16/0x1b > > [ 150.946012] Code: 67 4a 00 b8 e4 ff ff ff eb aa 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 48 8b 9f 40 05 00 00 > > [ 150.946012] 83 7b 70 00 74 1c 4c 8d a3 80 00 00 00 4c 89 e7 e8 d2 5d 4a > > [ 150.946012] RIP [] shmem_free_inode+0x18/0x50 > > [ 150.946012] RSP > > [ 150.946012] CR2: 0000000000000070 > > Looks at aweful lot like the problem from: > http://www.spinics.net/lists/linux-fsdevel/msg46101.html > I tried to reproduce this bug with your test program, but without success. However, if I understand correctly, this occurs since we dont hold any locks when we call iput() in mark_destroy(), right? With the patches you tested, iput() is also not called within any lock, since the groups mark_mutex is released temporarily before iput() is called. This is, since the original codes behaviour is similar. However since we now have a mutex as the biggest lock, we can do what you suggested (http://www.spinics.net/lists/linux-fsdevel/msg46107.html) and call iput() with the mutex held to avoid the race. The patch below implements this. It uses nested locking to avoid deadlock in case we do the final iput() on an inode which still holds marks and thus would take the mutex again when calling fsnotify_inode_delete() in destroy_inode(). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 5a8899350456..1af2f6a722c0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -88,9 +88,10 @@ struct fsnotify_event_private_data; * if the group is interested in this event. * handle_event - main call for a group to handle an fs event * free_group_priv - called when a group refcnt hits 0 to clean up the private union - * freeing-mark - this means that a mark has been flagged to die when everything - * finishes using it. The function is supplied with what must be a - * valid group and inode to use to clean up. + * freeing_mark - called when a mark is being destroyed for some reason. The group + * MUST be holding a reference on each mark and that reference must be + * dropped in this function. inotify uses this function to send + * userspace messages that marks have been removed. */ struct fsnotify_ops { bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, -- cgit v1.2.3 From 0a6b6bd5919a65030b557ec8fe81f6fb3e93744a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 14 Oct 2011 17:43:39 -0400 Subject: fsnotify: make fasync generic for both inotify and fanotify inotify is supposed to support async signal notification when information is available on the inotify fd. This patch moves that support to generic fsnotify functions so it can be used by all notification mechanisms. Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1af2f6a722c0..d5b0910d4961 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -148,6 +148,8 @@ struct fsnotify_group { * a group */ struct list_head marks_list; /* all inode marks for this group */ + struct fasync_struct *fsn_fa; /* async notification */ + /* groups can define private fields here or use the void *private */ union { void *private; @@ -156,7 +158,6 @@ struct fsnotify_group { spinlock_t idr_lock; struct idr idr; u32 last_wd; - struct fasync_struct *fa; /* async notification */ struct user_struct *user; } inotify_data; #endif @@ -368,6 +369,8 @@ extern void fsnotify_get_group(struct fsnotify_group *group); extern void fsnotify_put_group(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); +/* fasync handler function */ +extern int fsnotify_fasync(int fd, struct file *file, int on); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); -- cgit v1.2.3 From 7056741fd9fc14a65608549a4657cf5178f05f63 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Thu, 8 Nov 2012 13:47:44 -0800 Subject: lib/raid6: Add AVX2 optimized recovery functions Optimize RAID6 recovery functions to take advantage of the 256-bit YMM integer instructions introduced in AVX2. The patch was tested and benchmarked before submission. However hardware is not yet released so benchmark numbers cannot be reported. Acked-by: "H. Peter Anvin" Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- include/linux/raid/pq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 640c69ceec96..3156347452b9 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -109,6 +109,7 @@ struct raid6_recov_calls { extern const struct raid6_recov_calls raid6_recov_intx1; extern const struct raid6_recov_calls raid6_recov_ssse3; +extern const struct raid6_recov_calls raid6_recov_avx2; /* Algorithm list */ extern const struct raid6_calls * const raid6_algos[]; -- cgit v1.2.3 From 2c935842bdb46f5f557426feb4d2bdfdad1aa5f9 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Fri, 30 Nov 2012 13:10:39 -0800 Subject: lib/raid6: Add AVX2 optimized gen_syndrome functions Add AVX2 optimized gen_syndrom functions, which is simply based on sse2.c written by hpa. Signed-off-by: Yuanhan Liu Reviewed-by: H. Peter Anvin Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- include/linux/raid/pq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 3156347452b9..8dfaa2ce2e95 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -98,6 +98,9 @@ extern const struct raid6_calls raid6_altivec1; extern const struct raid6_calls raid6_altivec2; extern const struct raid6_calls raid6_altivec4; extern const struct raid6_calls raid6_altivec8; +extern const struct raid6_calls raid6_avx2x1; +extern const struct raid6_calls raid6_avx2x2; +extern const struct raid6_calls raid6_avx2x4; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); -- cgit v1.2.3 From 83aff95eb9d60aff5497e9f44a2ae906b86d8e88 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 28 Nov 2012 12:28:24 -0800 Subject: libceph: remove 'osdtimeout' option This would reset a connection with any OSD that had an outstanding request that was taking more than N seconds. The idea was that if the OSD was buggy, the client could compensate by resending the request. In reality, this only served to hide server bugs, and we haven't actually seen such a bug in quite a while. Moreover, the userspace client code never did this. More importantly, often the request is taking a long time because the OSD is trying to recover, or overloaded, and killing the connection and retrying would only make the situation worse by giving the OSD more work to do. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 42624789b06f..317aff8feb0a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,7 +43,6 @@ struct ceph_options { struct ceph_entity_addr my_addr; int mount_timeout; int osd_idle_ttl; - int osd_timeout; int osd_keepalive_timeout; /* @@ -63,7 +62,6 @@ struct ceph_options { * defaults */ #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ #define CEPH_OSD_KEEPALIVE_DEFAULT 5 #define CEPH_OSD_IDLE_TTL_DEFAULT 60 -- cgit v1.2.3 From d2cc4dde9206aa2c7fb237aa689d3277cc070547 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 29 Nov 2012 08:37:03 -0600 Subject: bdi_register: add __printf verification, fix arg mismatch __printf is useful to verify format and arguments. Signed-off-by: Joe Perches Reviewed-by: Alex Elder --- include/linux/backing-dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2a9a9abc9126..12731a19ef06 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -114,6 +114,7 @@ struct backing_dev_info { int bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); +__printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); -- cgit v1.2.3 From 34e1169d996ab148490c01b65b4ee371cf8ffba2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Oct 2012 07:31:07 +1030 Subject: module: add syscall to load module from fd As part of the effort to create a stronger boundary between root and kernel, Chrome OS wants to be able to enforce that kernel modules are being loaded only from our read-only crypto-hash verified (dm_verity) root filesystem. Since the init_module syscall hands the kernel a module as a memory blob, no reasoning about the origin of the blob can be made. Earlier proposals for appending signatures to kernel modules would not be useful in Chrome OS, since it would involve adding an additional set of keys to our kernel and builds for no good reason: we already trust the contents of our root filesystem. We don't need to verify those kernel modules a second time. Having to do signature checking on module loading would slow us down and be redundant. All we need to know is where a module is coming from so we can say yes/no to loading it. If a file descriptor is used as the source of a kernel module, many more things can be reasoned about. In Chrome OS's case, we could enforce that the module lives on the filesystem we expect it to live on. In the case of IMA (or other LSMs), it would be possible, for example, to examine extended attributes that may contain signatures over the contents of the module. This introduces a new syscall (on x86), similar to init_module, that has only two arguments. The first argument is used as a file descriptor to the module and the second argument is a pointer to the NULL terminated string of module arguments. Signed-off-by: Kees Cook Cc: Andrew Morton Signed-off-by: Rusty Russell (merge fixes) --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 727f0cd73921..32bc035bcd68 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); +asmlinkage long sys_finit_module(int fd, const char __user *uargs); #endif -- cgit v1.2.3 From 2f3238aebedb243804f58d62d57244edec4149b2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Oct 2012 18:09:41 +1030 Subject: module: add flags arg to sys_finit_module() Thanks to Michael Kerrisk for keeping us honest. These flags are actually useful for eliminating the only case where kmod has to mangle a module's internals: for overriding module versioning. Signed-off-by: Rusty Russell Acked-by: Lucas De Marchi Acked-by: Kees Cook --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 32bc035bcd68..8cf7b508cb50 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,5 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); -asmlinkage long sys_finit_module(int fd, const char __user *uargs); +asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); #endif -- cgit v1.2.3 From 2e72d51b4ac32989496870cd8171b3682fea1839 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Oct 2012 07:32:07 +1030 Subject: security: introduce kernel_module_from_file hook Now that kernel module origins can be reasoned about, provide a hook to the LSMs to make policy decisions about the module file. This will let Chrome OS enforce that loadable kernel modules can only come from its read-only hash-verified root filesystem. Other LSMs can, for example, read extended attributes for signatures, etc. Signed-off-by: Kees Cook Acked-by: Serge E. Hallyn Acked-by: Eric Paris Acked-by: Mimi Zohar Acked-by: James Morris Signed-off-by: Rusty Russell --- include/linux/security.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 05e88bdcf7d9..0f6afc657f77 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -694,6 +694,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * userspace to load a kernel module with the given name. * @kmod_name name of the module requested by the kernel * Return 0 if successful. + * @kernel_module_from_file: + * Load a kernel module from userspace. + * @file contains the file structure pointing to the file containing + * the kernel module to load. If the module is being loaded from a blob, + * this argument will be NULL. + * Return 0 if permission is granted. * @task_fix_setuid: * Update the module's state after setting one or more of the user * identity attributes of the current process. The @flags parameter @@ -1508,6 +1514,7 @@ struct security_operations { int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*kernel_module_request)(char *kmod_name); + int (*kernel_module_from_file)(struct file *file); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); int (*task_setpgid) (struct task_struct *p, pid_t pgid); @@ -1765,6 +1772,7 @@ void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); +int security_kernel_module_from_file(struct file *file); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); int security_task_setpgid(struct task_struct *p, pid_t pgid); @@ -2278,6 +2286,11 @@ static inline int security_kernel_module_request(char *kmod_name) return 0; } +static inline int security_kernel_module_from_file(struct file *file) +{ + return 0; +} + static inline int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags) -- cgit v1.2.3 From fdf90729e57812cb12d7938e2dee7c71e875fb08 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 16 Oct 2012 12:40:08 +1030 Subject: ima: support new kernel module syscall With the addition of the new kernel module syscall, which defines two arguments - a file descriptor to the kernel module and a pointer to a NULL terminated string of module arguments - it is now possible to measure and appraise kernel modules like any other file on the file system. This patch adds support to measure and appraise kernel modules in an extensible and consistent manner. To support filesystems without extended attribute support, additional patches could pass the signature as the first parameter. Signed-off-by: Mimi Zohar Signed-off-by: Rusty Russell --- include/linux/ima.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 2c7223d7e73b..86c361e947b9 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -18,6 +18,7 @@ extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_file_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); +extern int ima_module_check(struct file *file); #else static inline int ima_bprm_check(struct linux_binprm *bprm) @@ -40,6 +41,11 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot) return 0; } +static inline int ima_module_check(struct file *file) +{ + return 0; +} + #endif /* CONFIG_IMA_H */ #ifdef CONFIG_IMA_APPRAISE -- cgit v1.2.3 From 6f33d58794ef4cef4b2c706029810f9688bd3026 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Nov 2012 12:30:25 +1030 Subject: __UNIQUE_ID() Jan Beulich points out __COUNTER__ (gcc 4.3 and above), so let's use that to create unique ids. This is better than __LINE__ which we use today, so provide a wrapper. Stanislaw Gruszka reported that some module parameters start with a digit, so we need to prepend when we for the unique id. Signed-off-by: Rusty Russell Acked-by: Jan Beulich --- include/linux/compiler-gcc4.h | 2 ++ include/linux/compiler.h | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 412bc6c2b023..56c802cba7f6 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -31,6 +31,8 @@ #define __linktime_error(message) __attribute__((__error__(message))) +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + #if __GNUC_MINOR__ >= 5 /* * Mark a position in code as unreachable. This can be used to diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f430e4162f41..5f45335e1ac7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -42,6 +42,10 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __rcu #endif +/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ +#define ___PASTE(a,b) a##b +#define __PASTE(a,b) ___PASTE(a,b) + #ifdef __KERNEL__ #ifdef __GNUC__ @@ -164,6 +168,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); (typeof(ptr)) (__ptr + (off)); }) #endif +/* Not-quite-unique ID. */ +#ifndef __UNIQUE_ID +# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) +#endif + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 34182eea36fc1d70d748b0947c873314980ba806 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Nov 2012 12:30:25 +1030 Subject: moduleparam: use __UNIQUE_ID() Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index d6a58065c09c..137b4198fc03 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -16,17 +16,15 @@ /* Chosen so that structs with an unsigned long line up. */ #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) -#define ___module_cat(a,b) __mod_ ## a ## b -#define __module_cat(a,b) ___module_cat(a,b) #ifdef MODULE #define __MODULE_INFO(tag, name, info) \ -static const char __module_cat(name,__LINE__)[] \ +static const char __UNIQUE_ID(name)[] \ __used __attribute__((section(".modinfo"), unused, aligned(1))) \ = __stringify(tag) "=" info #else /* !MODULE */ /* This struct is here for syntactic coherency, it is not used */ #define __MODULE_INFO(tag, name, info) \ - struct __module_cat(name,__LINE__) {} + struct __UNIQUE_ID(name) {} #endif #define __MODULE_PARM_TYPE(name, _type) \ __MODULE_INFO(parmtype, name##type, #name ":" _type) -- cgit v1.2.3 From facc0a6bd494ce21e31b34fc355ecf702518272b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 11:55:28 +1030 Subject: ASN.1: Define indefinite length marker constant Define a constant to hold the marker value seen in an indefinite-length element. Signed-off-by: David Howells Signed-off-by: Rusty Russell --- include/linux/asn1.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/asn1.h b/include/linux/asn1.h index 5c3f4e4b9a23..eed6982860ba 100644 --- a/include/linux/asn1.h +++ b/include/linux/asn1.h @@ -64,4 +64,6 @@ enum asn1_tag { ASN1_LONG_TAG = 31 /* Long form tag */ }; +#define ASN1_INDEFINITE_LENGTH 0x80 + #endif /* _LINUX_ASN1_H */ -- cgit v1.2.3 From 63b68901dfd590cc13d4fe5c08dec2ca75b3c4aa Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 14 Dec 2012 09:09:11 -0800 Subject: mfd: omap-usb-host: get rid of cpu_is_omap..() macros Instead of using cpu_is_omap..() macros in the device driver we rely on information provided in the platform data. The only information we need is whether the USB Host module has a single ULPI bypass control bit for all ports or individual bypass control bits for each port. OMAP3 REV2.1 and earlier have the former. Signed-off-by: Roger Quadros Acked-by: Samuel Ortiz [tony@atomide.com: updated to remove plat/cpu.h] Signed-off-by: Tony Lindgren --- include/linux/platform_data/usb-omap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h index 8570bcfe6311..ef65b67c56c3 100644 --- a/include/linux/platform_data/usb-omap.h +++ b/include/linux/platform_data/usb-omap.h @@ -59,6 +59,9 @@ struct usbhs_omap_platform_data { struct ehci_hcd_omap_platform_data *ehci_data; struct ohci_hcd_omap_platform_data *ohci_data; + + /* OMAP3 <= ES2.1 have a single ulpi bypass control bit */ + unsigned single_ulpi_bypass:1; }; /*-------------------------------------------------------------------------*/ -- cgit v1.2.3 From d9ba573718666df2e7e30d671f81bba39d07f91c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 14 Dec 2012 09:09:11 -0800 Subject: ARM: OMAP: Move plat/omap-serial.h to include/linux/platform_data/serial-omap.h We need to move this file to allow ARM multiplatform configurations to build for omap2+. This can now be done as this file now only contains platform_data. cc: Russell King cc: Alan Cox cc: Greg Kroah-Hartman cc: Govindraj.R cc: Kevin Hilman cc: linux-serial@vger.kernel.org Reviewed-by: Felipe Balbi Signed-off-by: Tony Lindgren --- include/linux/platform_data/serial-omap.h | 51 +++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 include/linux/platform_data/serial-omap.h (limited to 'include/linux') diff --git a/include/linux/platform_data/serial-omap.h b/include/linux/platform_data/serial-omap.h new file mode 100644 index 000000000000..ff9b0aab5281 --- /dev/null +++ b/include/linux/platform_data/serial-omap.h @@ -0,0 +1,51 @@ +/* + * Driver for OMAP-UART controller. + * Based on drivers/serial/8250.c + * + * Copyright (C) 2010 Texas Instruments. + * + * Authors: + * Govindraj R + * Thara Gopinath + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __OMAP_SERIAL_H__ +#define __OMAP_SERIAL_H__ + +#include +#include +#include + +#define DRIVER_NAME "omap_uart" + +/* + * Use tty device name as ttyO, [O -> OMAP] + * in bootargs we specify as console=ttyO0 if uart1 + * is used as console uart. + */ +#define OMAP_SERIAL_NAME "ttyO" + +struct omap_uart_port_info { + bool dma_enabled; /* To specify DMA Mode */ + unsigned int uartclk; /* UART clock rate */ + upf_t flags; /* UPF_* flags */ + unsigned int dma_rx_buf_size; + unsigned int dma_rx_timeout; + unsigned int autosuspend_timeout; + unsigned int dma_rx_poll_rate; + int DTR_gpio; + int DTR_inverted; + int DTR_present; + + int (*get_context_loss_count)(struct device *); + void (*set_forceidle)(struct device *); + void (*set_noidle)(struct device *); + void (*enable_wakeup)(struct device *, bool); +}; + +#endif /* __OMAP_SERIAL_H__ */ -- cgit v1.2.3 From 8e63b6a8adabb0551124c3b78f7f5f36912c3728 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 15 Dec 2012 15:21:52 -0500 Subject: NFSv4.1: Move the RPC timestamp out of the slot. Shave a few bytes off the slot table size by moving the RPC timestamp into the sequence results. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a55abd499c21..29adb12c7ecf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -194,6 +194,7 @@ struct nfs4_sequence_args { struct nfs4_sequence_res { struct nfs4_slot *sr_slot; /* slot used to send request */ + unsigned long sr_timestamp; int sr_status; /* sequence operation status */ u32 sr_status_flags; u32 sr_highest_slotid; -- cgit v1.2.3 From afc59400d6c65bad66d4ad0b2daf879cbff8e23e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Dec 2012 18:01:37 -0500 Subject: nfsd4: cleanup: replace rq_resused count by rq_next_page pointer It may be a matter of personal taste, but I find this makes the code clearer. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d83db800fe02..676ddf53b3ee 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -243,6 +243,7 @@ struct svc_rqst { struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * *rq_respages; /* points into rq_pages */ int rq_resused; /* number of pages used for result */ + struct page * *rq_next_page; /* next reply page to use */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ @@ -338,9 +339,8 @@ xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) static inline void svc_free_res_pages(struct svc_rqst *rqstp) { - while (rqstp->rq_resused) { - struct page **pp = (rqstp->rq_respages + - --rqstp->rq_resused); + while (rqstp->rq_next_page != rqstp->rq_respages) { + struct page **pp = --rqstp->rq_next_page; if (*pp) { put_page(*pp); *pp = NULL; -- cgit v1.2.3 From 06ca287dbac9cc19d04ac2901b8c4882c03795ff Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 16 Oct 2012 23:56:14 +1030 Subject: virtio: move queue_index and num_free fields into core struct virtqueue. They're generic concepts, so hoist them. This also avoids accessor functions (though kept around for merge with DaveM's net tree). This goes even further than Jason Wang's 17bb6d4088 patch ("virtio-ring: move queue_index to vring_virtqueue") which moved the queue_index from the specific transport. Acked-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 533b1157f22e..4b8f17d90458 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -16,12 +16,20 @@ * @name: the name of this virtqueue (mainly for debugging) * @vdev: the virtio device this queue was created for. * @priv: a pointer for the virtqueue implementation to use. + * @index: the zero-based ordinal number for this queue. + * @num_free: number of elements we expect to be able to fit. + * + * A note on @num_free: with indirect buffers, each buffer needs one + * element in the queue, otherwise a buffer will need one element per + * sg element. */ struct virtqueue { struct list_head list; void (*callback)(struct virtqueue *vq); const char *name; struct virtio_device *vdev; + unsigned int index; + unsigned int num_free; void *priv; }; @@ -50,7 +58,11 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq); unsigned int virtqueue_get_vring_size(struct virtqueue *vq); -int virtqueue_get_queue_index(struct virtqueue *vq); +/* FIXME: Obsolete accessor, but required for virtio_net merge. */ +static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq) +{ + return vq->index; +} /** * virtio_device - representation of a device using virtio -- cgit v1.2.3 From 9bffdca8c64a72ac54c47a552734ab457bc720d4 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Tue, 11 Dec 2012 11:04:50 +1030 Subject: virtio: use dev_to_virtio wrapper in virtio Use dev_to_virtio wrapper in virtio to make code clearly. Cc: Rusty Russell Cc: "Michael S. Tsirkin" Signed-off-by: Wanlong Gao Signed-off-by: Rusty Russell --- include/linux/virtio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4b8f17d90458..5f1f80c76468 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -85,7 +85,11 @@ struct virtio_device { void *priv; }; -#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev) +static inline struct virtio_device *dev_to_virtio(struct device *_dev) +{ + return container_of(_dev, struct virtio_device, dev); +} + int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); -- cgit v1.2.3 From 9a2bdcc85d28506d4e5d4a9618fb133a3f40945d Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Mon, 10 Dec 2012 16:38:33 +0800 Subject: virtio: add drv_to_virtio to make code clearly Add drv_to_virtio wrapper to get virtio_driver from device_driver. Cc: Rusty Russell Cc: "Michael S. Tsirkin" Signed-off-by: Wanlong Gao Signed-off-by: Rusty Russell --- include/linux/virtio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 5f1f80c76468..cf8adb1f5b2c 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -119,6 +119,11 @@ struct virtio_driver { #endif }; +static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) +{ + return container_of(drv, struct virtio_driver, driver); +} + int register_virtio_driver(struct virtio_driver *drv); void unregister_virtio_driver(struct virtio_driver *drv); #endif /* _LINUX_VIRTIO_H */ -- cgit v1.2.3 From 7a64bf05b2a6fe3703062d13d389e3eb904741c6 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:21:51 -0800 Subject: mm: add a __GFP_KMEMCG flag This flag is used to indicate to the callees that this allocation is a kernel allocation in process context, and should be accounted to current's memcg. Signed-off-by: Glauber Costa Acked-by: Johannes Weiner Acked-by: Rik van Riel Acked-by: Mel Gorman Acked-by: Kamezawa Hiroyuki Acked-by: Michal Hocko Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Suleiman Souhlal Cc: Tejun Heo Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: JoonSoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f74856e17e48..643c9a6f7f34 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -30,6 +30,7 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_RECLAIMABLE 0x80000u +#define ___GFP_KMEMCG 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_NO_KSWAPD 0x400000u #define ___GFP_OTHER_NODE 0x800000u @@ -89,6 +90,7 @@ struct vm_area_struct; #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ +#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ /* -- cgit v1.2.3 From 7ae1e1d0f8ac2927ed7e3ca6d15e42d485903459 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:21:56 -0800 Subject: memcg: kmem controller infrastructure Introduce infrastructure for tracking kernel memory pages to a given memcg. This will happen whenever the caller includes the flag __GFP_KMEMCG flag, and the task belong to a memcg other than the root. In memcontrol.h those functions are wrapped in inline acessors. The idea is to later on, patch those with static branches, so we don't incur any overhead when no mem cgroups with limited kmem are being used. Users of this functionality shall interact with the memcg core code through the following functions: memcg_kmem_newpage_charge: will return true if the group can handle the allocation. At this point, struct page is not yet allocated. memcg_kmem_commit_charge: will either revert the charge, if struct page allocation failed, or embed memcg information into page_cgroup. memcg_kmem_uncharge_page: called at free time, will revert the charge. Signed-off-by: Glauber Costa Acked-by: Michal Hocko Acked-by: Kamezawa Hiroyuki Cc: Christoph Lameter Cc: Pekka Enberg Cc: Johannes Weiner Cc: Tejun Heo Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: JoonSoo Kim Cc: Mel Gorman Cc: Rik van Riel Cc: Suleiman Souhlal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 110 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e98a74c0c9c0..afa2ad40457e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -21,6 +21,7 @@ #define _LINUX_MEMCONTROL_H #include #include +#include struct mem_cgroup; struct page_cgroup; @@ -414,5 +415,114 @@ static inline void sock_release_memcg(struct sock *sk) { } #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ + +#ifdef CONFIG_MEMCG_KMEM +static inline bool memcg_kmem_enabled(void) +{ + return true; +} + +/* + * In general, we'll do everything in our power to not incur in any overhead + * for non-memcg users for the kmem functions. Not even a function call, if we + * can avoid it. + * + * Therefore, we'll inline all those functions so that in the best case, we'll + * see that kmemcg is off for everybody and proceed quickly. If it is on, + * we'll still do most of the flag checking inline. We check a lot of + * conditions, but because they are pretty simple, they are expected to be + * fast. + */ +bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, + int order); +void __memcg_kmem_commit_charge(struct page *page, + struct mem_cgroup *memcg, int order); +void __memcg_kmem_uncharge_pages(struct page *page, int order); + +/** + * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. + * @gfp: the gfp allocation flags. + * @memcg: a pointer to the memcg this was charged against. + * @order: allocation order. + * + * returns true if the memcg where the current task belongs can hold this + * allocation. + * + * We return true automatically if this allocation is not to be accounted to + * any memcg. + */ +static inline bool +memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) +{ + if (!memcg_kmem_enabled()) + return true; + + /* + * __GFP_NOFAIL allocations will move on even if charging is not + * possible. Therefore we don't even try, and have this allocation + * unaccounted. We could in theory charge it with + * res_counter_charge_nofail, but we hope those allocations are rare, + * and won't be worth the trouble. + */ + if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL)) + return true; + if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) + return true; + + /* If the test is dying, just let it go. */ + if (unlikely(fatal_signal_pending(current))) + return true; + + return __memcg_kmem_newpage_charge(gfp, memcg, order); +} + +/** + * memcg_kmem_uncharge_pages: uncharge pages from memcg + * @page: pointer to struct page being freed + * @order: allocation order. + * + * there is no need to specify memcg here, since it is embedded in page_cgroup + */ +static inline void +memcg_kmem_uncharge_pages(struct page *page, int order) +{ + if (memcg_kmem_enabled()) + __memcg_kmem_uncharge_pages(page, order); +} + +/** + * memcg_kmem_commit_charge: embeds correct memcg in a page + * @page: pointer to struct page recently allocated + * @memcg: the memcg structure we charged against + * @order: allocation order. + * + * Needs to be called after memcg_kmem_newpage_charge, regardless of success or + * failure of the allocation. if @page is NULL, this function will revert the + * charges. Otherwise, it will commit the memcg given by @memcg to the + * corresponding page_cgroup. + */ +static inline void +memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) +{ + if (memcg_kmem_enabled() && memcg) + __memcg_kmem_commit_charge(page, memcg, order); +} + +#else +static inline bool +memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) +{ + return true; +} + +static inline void memcg_kmem_uncharge_pages(struct page *page, int order) +{ +} + +static inline void +memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) +{ +} +#endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 6a1a0d3b625a4091e7a0eb249aefc6a644385149 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:00 -0800 Subject: mm: allocate kernel pages to the right memcg When a process tries to allocate a page with the __GFP_KMEMCG flag, the page allocator will call the corresponding memcg functions to validate the allocation. Tasks in the root memcg can always proceed. To avoid adding markers to the page - and a kmem flag that would necessarily follow, as much as doing page_cgroup lookups for no reason, whoever is marking its allocations with __GFP_KMEMCG flag is responsible for telling the page allocator that this is such an allocation at free_pages() time. This is done by the invocation of __free_accounted_pages() and free_accounted_pages(). Signed-off-by: Glauber Costa Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Kamezawa Hiroyuki Acked-by: David Rientjes Cc: Christoph Lameter Cc: Pekka Enberg Cc: Johannes Weiner Cc: Suleiman Souhlal Cc: Tejun Heo Cc: Frederic Weisbecker Cc: Greg Thelen Cc: JoonSoo Kim Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 643c9a6f7f34..0f615eb23d05 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -367,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_cold_page(struct page *page, int cold); extern void free_hot_cold_page_list(struct list_head *list, int cold); +extern void __free_memcg_kmem_pages(struct page *page, unsigned int order); +extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order); + #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) -- cgit v1.2.3 From 50bdd430c20566b13d8bc59946184b08f5875de6 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:04 -0800 Subject: res_counter: return amount of charges after res_counter_uncharge() It is useful to know how many charges are still left after a call to res_counter_uncharge. While it is possible to issue a res_counter_read after uncharge, this can be racy. If we need, for instance, to take some action when the counters drop down to 0, only one of the callers should see it. This is the same semantics as the atomic variables in the kernel. Since the current return value is void, we don't need to worry about anything breaking due to this change: nobody relied on that, and only users appearing from now on will be checking this value. Signed-off-by: Glauber Costa Reviewed-by: Michal Hocko Acked-by: Kamezawa Hiroyuki Acked-by: David Rientjes Cc: Johannes Weiner Cc: Suleiman Souhlal Cc: Tejun Heo Cc: Christoph Lameter Cc: Frederic Weisbecker Cc: Greg Thelen Cc: JoonSoo Kim Cc: Mel Gorman Cc: Pekka Enberg Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 6f54e40fa218..5ae8456d9670 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter, * * these calls check for usage underflow and show a warning on the console * _locked call expects the counter->lock to be taken + * + * returns the total charges still present in @counter. */ -void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); -void res_counter_uncharge(struct res_counter *counter, unsigned long val); +u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); +u64 res_counter_uncharge(struct res_counter *counter, unsigned long val); -void res_counter_uncharge_until(struct res_counter *counter, - struct res_counter *top, - unsigned long val); +u64 res_counter_uncharge_until(struct res_counter *counter, + struct res_counter *top, + unsigned long val); /** * res_counter_margin - calculate chargeable space of a counter * @cnt: the counter -- cgit v1.2.3 From a8964b9b84f99c0b1b5d7c09520f89f0700e742e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:09 -0800 Subject: memcg: use static branches when code not in use We can use static branches to patch the code in or out when not used. Because the _ACTIVE bit on kmem_accounted is only set after the increment is done, we guarantee that the root memcg will always be selected for kmem charges until all call sites are patched (see memcg_kmem_enabled). This guarantees that no mischarges are applied. Static branch decrement happens when the last reference count from the kmem accounting in memcg dies. This will only happen when the charges drop down to 0. When that happens, we need to disable the static branch only on those memcgs that enabled it. To achieve this, we would be forced to complicate the code by keeping track of which memcgs were the ones that actually enabled limits, and which ones got it from its parents. It is a lot simpler just to do static_key_slow_inc() on every child that is accounted. Signed-off-by: Glauber Costa Acked-by: Michal Hocko Acked-by: Kamezawa Hiroyuki Cc: Christoph Lameter Cc: Pekka Enberg Cc: Johannes Weiner Cc: Suleiman Souhlal Cc: Tejun Heo Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: JoonSoo Kim Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index afa2ad40457e..87d61e840ddd 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -22,6 +22,7 @@ #include #include #include +#include struct mem_cgroup; struct page_cgroup; @@ -417,9 +418,10 @@ static inline void sock_release_memcg(struct sock *sk) #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ #ifdef CONFIG_MEMCG_KMEM +extern struct static_key memcg_kmem_enabled_key; static inline bool memcg_kmem_enabled(void) { - return true; + return static_key_false(&memcg_kmem_enabled_key); } /* -- cgit v1.2.3 From 2ad306b17c0ac5a1b1f250d5f772aeb87fdf1eba Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:18 -0800 Subject: fork: protect architectures where THREAD_SIZE >= PAGE_SIZE against fork bombs Because those architectures will draw their stacks directly from the page allocator, rather than the slab cache, we can directly pass __GFP_KMEMCG flag, and issue the corresponding free_pages. This code path is taken when the architecture doesn't define CONFIG_ARCH_THREAD_INFO_ALLOCATOR (only ia64 seems to), and has THREAD_SIZE >= PAGE_SIZE. Luckily, most - if not all - of the remaining architectures fall in this category. This will guarantee that every stack page is accounted to the memcg the process currently lives on, and will have the allocations to fail if they go over limit. For the time being, I am defining a new variant of THREADINFO_GFP, not to mess with the other path. Once the slab is also tracked by memcg, we can get rid of that flag. Tested to successfully protect against :(){ :|:& };: Signed-off-by: Glauber Costa Acked-by: Frederic Weisbecker Acked-by: Kamezawa Hiroyuki Reviewed-by: Michal Hocko Cc: Christoph Lameter Cc: David Rientjes Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: Mel Gorman Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ccc1899bd62e..e7e04736802f 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm); # define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) #endif +#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG) + /* * flag set/clear/test wrappers * - pass TIF_xxxx constants to these functions -- cgit v1.2.3 From ba6c496ed834a37a26fc6fc87fc9aecb0fa0014d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:27 -0800 Subject: slab/slub: struct memcg_params For the kmem slab controller, we need to record some extra information in the kmem_cache structure. Signed-off-by: Glauber Costa Signed-off-by: Suleiman Souhlal Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 24 ++++++++++++++++++++++++ include/linux/slab_def.h | 3 +++ include/linux/slub_def.h | 3 +++ 3 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 743a10415122..00efba149222 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -176,6 +176,30 @@ void kmem_cache_free(struct kmem_cache *, void *); #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif +/* + * This is the main placeholder for memcg-related information in kmem caches. + * struct kmem_cache will hold a pointer to it, so the memory cost while + * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it + * would otherwise be if that would be bundled in kmem_cache: we'll need an + * extra pointer chase. But the trade off clearly lays in favor of not + * penalizing non-users. + * + * Both the root cache and the child caches will have it. For the root cache, + * this will hold a dynamically allocated array large enough to hold + * information about the currently limited memcgs in the system. + * + * Child caches will hold extra metadata needed for its operation. Fields are: + * + * @memcg: pointer to the memcg this cache belongs to + */ +struct memcg_cache_params { + bool is_root_cache; + union { + struct kmem_cache *memcg_caches[0]; + struct mem_cgroup *memcg; + }; +}; + /* * Common kmalloc functions provided by all allocators */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 45c0356fdc8c..8bb6e0eaf3c6 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -81,6 +81,9 @@ struct kmem_cache { */ int obj_offset; #endif /* CONFIG_DEBUG_SLAB */ +#ifdef CONFIG_MEMCG_KMEM + struct memcg_cache_params *memcg_params; +#endif /* 6) per-cpu/per-node data, touched during every alloc/free */ /* diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index df448adb7283..961e72eab907 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -101,6 +101,9 @@ struct kmem_cache { #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif +#ifdef CONFIG_MEMCG_KMEM + struct memcg_cache_params *memcg_params; +#endif #ifdef CONFIG_NUMA /* -- cgit v1.2.3 From 2633d7a028239a738b793be5ca8fa6ac312f5793 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:34 -0800 Subject: slab/slub: consider a memcg parameter in kmem_create_cache Allow a memcg parameter to be passed during cache creation. When the slub allocator is being used, it will only merge caches that belong to the same memcg. We'll do this by scanning the global list, and then translating the cache to a memcg-specific cache Default function is created as a wrapper, passing NULL to the memcg version. We only merge caches that belong to the same memcg. A helper is provided, memcg_css_id: because slub needs a unique cache name for sysfs. Since this is visible, but not the canonical location for slab data, the cache name is not used, the css_id should suffice. Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 26 ++++++++++++++++++++++++++ include/linux/slab.h | 14 +++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 87d61e840ddd..0b69a0470007 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -28,6 +28,7 @@ struct mem_cgroup; struct page_cgroup; struct page; struct mm_struct; +struct kmem_cache; /* Stats that can be updated by kernel. */ enum mem_cgroup_page_stat_item { @@ -441,6 +442,11 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order); void __memcg_kmem_uncharge_pages(struct page *page, int order); +int memcg_cache_id(struct mem_cgroup *memcg); +int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s); +void memcg_release_cache(struct kmem_cache *cachep); +void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep); + /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. * @gfp: the gfp allocation flags. @@ -525,6 +531,26 @@ static inline void memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) { } + +static inline int memcg_cache_id(struct mem_cgroup *memcg) +{ + return -1; +} + +static inline int memcg_register_cache(struct mem_cgroup *memcg, + struct kmem_cache *s) +{ + return 0; +} + +static inline void memcg_release_cache(struct kmem_cache *cachep) +{ +} + +static inline void memcg_cache_list_add(struct mem_cgroup *memcg, + struct kmem_cache *s) +{ +} #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 00efba149222..c0fcf28c15b2 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -116,6 +116,7 @@ struct kmem_cache { }; #endif +struct mem_cgroup; /* * struct kmem_cache related prototypes */ @@ -125,6 +126,9 @@ int slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *)); +struct kmem_cache * +kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t, + unsigned long, void (*)(void *)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); @@ -191,15 +195,23 @@ void kmem_cache_free(struct kmem_cache *, void *); * Child caches will hold extra metadata needed for its operation. Fields are: * * @memcg: pointer to the memcg this cache belongs to + * @list: list_head for the list of all caches in this memcg + * @root_cache: pointer to the global, root cache, this cache was derived from */ struct memcg_cache_params { bool is_root_cache; union { struct kmem_cache *memcg_caches[0]; - struct mem_cgroup *memcg; + struct { + struct mem_cgroup *memcg; + struct list_head list; + struct kmem_cache *root_cache; + }; }; }; +int memcg_update_all_caches(int num_memcgs); + /* * Common kmalloc functions provided by all allocators */ -- cgit v1.2.3 From 55007d849759252ddd573aeb36143b947202d509 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:38 -0800 Subject: memcg: allocate memory for memcg caches whenever a new memcg appears Every cache that is considered a root cache (basically the "original" caches, tied to the root memcg/no-memcg) will have an array that should be large enough to store a cache pointer per each memcg in the system. Theoreticaly, this is as high as 1 << sizeof(css_id), which is currently in the 64k pointers range. Most of the time, we won't be using that much. What goes in this patch, is a simple scheme to dynamically allocate such an array, in order to minimize memory usage for memcg caches. Because we would also like to avoid allocations all the time, at least for now, the array will only grow. It will tend to be big enough to hold the maximum number of kmem-limited memcgs ever achieved. We'll allocate it to be a minimum of 64 kmem-limited memcgs. When we have more than that, we'll start doubling the size of this array every time the limit is reached. Because we are only considering kmem limited memcgs, a natural point for this to happen is when we write to the limit. At that point, we already have set_limit_mutex held, so that will become our natural synchronization mechanism. Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0b69a0470007..45085e14e023 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -447,6 +447,8 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s); void memcg_release_cache(struct kmem_cache *cachep); void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep); +int memcg_update_cache_size(struct kmem_cache *s, int num_groups); +void memcg_update_array_size(int num_groups); /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. * @gfp: the gfp allocation flags. -- cgit v1.2.3 From d7f25f8a2f81252d1ac134470ba1d0a287cf8fcd Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:40 -0800 Subject: memcg: infrastructure to match an allocation to the right cache The page allocator is able to bind a page to a memcg when it is allocated. But for the caches, we'd like to have as many objects as possible in a page belonging to the same cache. This is done in this patch by calling memcg_kmem_get_cache in the beginning of every allocation function. This function is patched out by static branches when kernel memory controller is not being used. It assumes that the task allocating, which determines the memcg in the page allocator, belongs to the same cgroup throughout the whole process. Misaccounting can happen if the task calls memcg_kmem_get_cache() while belonging to a cgroup, and later on changes. This is considered acceptable, and should only happen upon task migration. Before the cache is created by the memcg core, there is also a possible imbalance: the task belongs to a memcg, but the cache being allocated from is the global cache, since the child cache is not yet guaranteed to be ready. This case is also fine, since in this case the GFP_KMEMCG will not be passed and the page allocator will not attempt any cgroup accounting. Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 45085e14e023..bd9b5d73bc2b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -449,6 +449,10 @@ void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep); int memcg_update_cache_size(struct kmem_cache *s, int num_groups); void memcg_update_array_size(int num_groups); + +struct kmem_cache * +__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); + /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. * @gfp: the gfp allocation flags. @@ -518,6 +522,37 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) __memcg_kmem_commit_charge(page, memcg, order); } +/** + * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation + * @cachep: the original global kmem cache + * @gfp: allocation flags. + * + * This function assumes that the task allocating, which determines the memcg + * in the page allocator, belongs to the same cgroup throughout the whole + * process. Misacounting can happen if the task calls memcg_kmem_get_cache() + * while belonging to a cgroup, and later on changes. This is considered + * acceptable, and should only happen upon task migration. + * + * Before the cache is created by the memcg core, there is also a possible + * imbalance: the task belongs to a memcg, but the cache being allocated from + * is the global cache, since the child cache is not yet guaranteed to be + * ready. This case is also fine, since in this case the GFP_KMEMCG will not be + * passed and the page allocator will not attempt any cgroup accounting. + */ +static __always_inline struct kmem_cache * +memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) +{ + if (!memcg_kmem_enabled()) + return cachep; + if (gfp & __GFP_NOFAIL) + return cachep; + if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) + return cachep; + if (unlikely(fatal_signal_pending(current))) + return cachep; + + return __memcg_kmem_get_cache(cachep, gfp); +} #else static inline bool memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) @@ -553,6 +588,12 @@ static inline void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *s) { } + +static inline struct kmem_cache * +memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) +{ + return cachep; +} #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 0e9d92f2d02d8c8320f0502307c688d07bdac2b3 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:42 -0800 Subject: memcg: skip memcg kmem allocations in specified code regions Create a mechanism that skip memcg allocations during certain pieces of our core code. It basically works in the same way as preempt_disable()/preempt_enable(): By marking a region under which all allocations will be accounted to the root memcg. We need this to prevent races in early cache creation, when we allocate data using caches that are not necessarily created already. Signed-off-by: Glauber Costa yCc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9914c662ed7b..f712465b05c5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1597,6 +1597,7 @@ struct task_struct { unsigned long nr_pages; /* uncharged usage */ unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ } memcg_batch; + unsigned int memcg_kmem_skip_account; #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT atomic_t ptrace_bp_refcnt; -- cgit v1.2.3 From b9ce5ef49f00daf2254c6953c8d31f79aabccd34 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:46 -0800 Subject: sl[au]b: always get the cache from its page in kmem_cache_free() struct page already has this information. If we start chaining caches, this information will always be more trustworthy than whatever is passed into the function. Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bd9b5d73bc2b..2298122e71ad 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -554,6 +554,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) return __memcg_kmem_get_cache(cachep, gfp); } #else +static inline bool memcg_kmem_enabled(void) +{ + return false; +} + static inline bool memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) { -- cgit v1.2.3 From d79923fad95b0cdf7770e024677180c734cb7148 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:48 -0800 Subject: sl[au]b: allocate objects from memcg cache We are able to match a cache allocation to a particular memcg. If the task doesn't change groups during the allocation itself - a rare event, this will give us a good picture about who is the first group to touch a cache page. This patch uses the now available infrastructure by calling memcg_kmem_get_cache() before all the cache allocations. Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 961e72eab907..364ba6c9fe21 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -225,7 +225,10 @@ void *__kmalloc(size_t size, gfp_t flags); static __always_inline void * kmalloc_order(size_t size, gfp_t flags, unsigned int order) { - void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order); + void *ret; + + flags |= (__GFP_COMP | __GFP_KMEMCG); + ret = (void *) __get_free_pages(flags, order); kmemleak_alloc(ret, size, 1, flags); return ret; } -- cgit v1.2.3 From 1f458cbf122288b23620ee822e19bcbb76c8d6ec Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:50 -0800 Subject: memcg: destroy memcg caches Implement destruction of memcg caches. Right now, only caches where our reference counter is the last remaining are deleted. If there are any other reference counters around, we just leave the caches lying around until they go away. When that happens, a destruction function is called from the cache code. Caches are only destroyed in process context, so we queue them up for later processing in the general case. Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 ++ include/linux/slab.h | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2298122e71ad..79fcf0cd7186 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -453,6 +453,8 @@ void memcg_update_array_size(int num_groups); struct kmem_cache * __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); +void mem_cgroup_destroy_cache(struct kmem_cache *cachep); + /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. * @gfp: the gfp allocation flags. diff --git a/include/linux/slab.h b/include/linux/slab.h index c0fcf28c15b2..869efb8d2377 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -11,6 +11,8 @@ #include #include +#include + /* * Flags to pass to kmem_cache_create(). @@ -179,7 +181,6 @@ void kmem_cache_free(struct kmem_cache *, void *); #ifndef ARCH_SLAB_MINALIGN #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif - /* * This is the main placeholder for memcg-related information in kmem caches. * struct kmem_cache will hold a pointer to it, so the memory cost while @@ -197,6 +198,10 @@ void kmem_cache_free(struct kmem_cache *, void *); * @memcg: pointer to the memcg this cache belongs to * @list: list_head for the list of all caches in this memcg * @root_cache: pointer to the global, root cache, this cache was derived from + * @dead: set to true after the memcg dies; the cache may still be around. + * @nr_pages: number of pages that belongs to this cache. + * @destroy: worker to be called whenever we are ready, or believe we may be + * ready, to destroy this cache. */ struct memcg_cache_params { bool is_root_cache; @@ -206,6 +211,9 @@ struct memcg_cache_params { struct mem_cgroup *memcg; struct list_head list; struct kmem_cache *root_cache; + bool dead; + atomic_t nr_pages; + struct work_struct destroy; }; }; }; -- cgit v1.2.3 From 7cf2798240a2a2230cb16a391beef98d8a7ad362 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:22:55 -0800 Subject: memcg/sl[au]b: track all the memcg children of a kmem_cache This enables us to remove all the children of a kmem_cache being destroyed, if for example the kernel module it's being used in gets unloaded. Otherwise, the children will still point to the destroyed parent. Signed-off-by: Suleiman Souhlal Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 79fcf0cd7186..e119f3ef793c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -454,6 +454,7 @@ struct kmem_cache * __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); void mem_cgroup_destroy_cache(struct kmem_cache *cachep); +void kmem_cache_destroy_memcg_children(struct kmem_cache *s); /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. @@ -601,6 +602,10 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { return cachep; } + +static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s) +{ +} #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 749c54151a6e5b229e4ae067dbc651e54b161fbc Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:23:01 -0800 Subject: memcg: aggregate memcg cache values in slabinfo When we create caches in memcgs, we need to display their usage information somewhere. We'll adopt a scheme similar to /proc/meminfo, with aggregate totals shown in the global file, and per-group information stored in the group itself. For the time being, only reads are allowed in the per-group cache. Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ++++++++ include/linux/slab.h | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e119f3ef793c..8dc7c746b44f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -420,6 +420,11 @@ static inline void sock_release_memcg(struct sock *sk) #ifdef CONFIG_MEMCG_KMEM extern struct static_key memcg_kmem_enabled_key; + +extern int memcg_limited_groups_array_size; +#define for_each_memcg_cache_index(_idx) \ + for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++) + static inline bool memcg_kmem_enabled(void) { return static_key_false(&memcg_kmem_enabled_key); @@ -557,6 +562,9 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) return __memcg_kmem_get_cache(cachep, gfp); } #else +#define for_each_memcg_cache_index(_idx) \ + for (; NULL; ) + static inline bool memcg_kmem_enabled(void) { return false; diff --git a/include/linux/slab.h b/include/linux/slab.h index 869efb8d2377..b9278663f22a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -220,6 +220,10 @@ struct memcg_cache_params { int memcg_update_all_caches(int num_memcgs); +struct seq_file; +int cache_show(struct kmem_cache *s, struct seq_file *m); +void print_slabinfo_header(struct seq_file *m); + /* * Common kmalloc functions provided by all allocators */ -- cgit v1.2.3 From 943a451a87d229ca564a27274b58eaeae35fde5d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:23:03 -0800 Subject: slab: propagate tunable values SLAB allows us to tune a particular cache behavior with tunables. When creating a new memcg cache copy, we'd like to preserve any tunables the parent cache already had. This could be done by an explicit call to do_tune_cpucache() after the cache is created. But this is not very convenient now that the caches are created from common code, since this function is SLAB-specific. Another method of doing that is taking advantage of the fact that do_tune_cpucache() is always called from enable_cpucache(), which is called at cache initialization. We can just preset the values, and then things work as expected. It can also happen that a root cache has its tunables updated during normal system operation. In this case, we will propagate the change to all caches that are already active. This change will require us to move the assignment of root_cache in memcg_params a bit earlier. We need this to be already set - which memcg_kmem_register_cache will do - when we reach __kmem_cache_create() Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 +++++--- include/linux/slab.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8dc7c746b44f..ea02ff970836 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -448,7 +448,8 @@ void __memcg_kmem_commit_charge(struct page *page, void __memcg_kmem_uncharge_pages(struct page *page, int order); int memcg_cache_id(struct mem_cgroup *memcg); -int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s); +int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, + struct kmem_cache *root_cache); void memcg_release_cache(struct kmem_cache *cachep); void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep); @@ -590,8 +591,9 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return -1; } -static inline int memcg_register_cache(struct mem_cgroup *memcg, - struct kmem_cache *s) +static inline int +memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, + struct kmem_cache *root_cache) { return 0; } diff --git a/include/linux/slab.h b/include/linux/slab.h index b9278663f22a..5d168d7e0a28 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -130,7 +130,7 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, void (*)(void *)); struct kmem_cache * kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t, - unsigned long, void (*)(void *)); + unsigned long, void (*)(void *), struct kmem_cache *); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); -- cgit v1.2.3 From 107dab5c92d5f9c3afe962036e47c207363255c7 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:23:05 -0800 Subject: slub: slub-specific propagation changes SLUB allows us to tune a particular cache behavior with sysfs-based tunables. When creating a new memcg cache copy, we'd like to preserve any tunables the parent cache already had. This can be done by tapping into the store attribute function provided by the allocator. We of course don't need to mess with read-only fields. Since the attributes can have multiple types and are stored internally by sysfs, the best strategy is to issue a ->show() in the root cache, and then ->store() in the memcg cache. The drawback of that, is that sysfs can allocate up to a page in buffering for show(), that we are likely not to need, but also can't guarantee. To avoid always allocating a page for that, we can update the caches at store time with the maximum attribute size ever stored to the root cache. We will then get a buffer big enough to hold it. The corolary to this, is that if no stores happened, nothing will be propagated. It can also happen that a root cache has its tunables updated during normal system operation. In this case, we will propagate the change to all caches that are already active. [akpm@linux-foundation.org: tweak code to avoid __maybe_unused] Signed-off-by: Glauber Costa Cc: Christoph Lameter Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: Johannes Weiner Cc: JoonSoo Kim Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Rik van Riel Cc: Suleiman Souhlal Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 364ba6c9fe21..9db4825cd393 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -103,6 +103,7 @@ struct kmem_cache { #endif #ifdef CONFIG_MEMCG_KMEM struct memcg_cache_params *memcg_params; + int max_attr_size; /* for propagation, maximum size of a stored attr */ #endif #ifdef CONFIG_NUMA -- cgit v1.2.3 From ebe945c27628fca03723582eba138acc2e2f3d15 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 18 Dec 2012 14:23:10 -0800 Subject: memcg: add comments clarifying aspects of cache attribute propagation This patch clarifies two aspects of cache attribute propagation. First, the expected context for the for_each_memcg_cache macro in memcontrol.h. The usages already in the codebase are safe. In mm/slub.c, it is trivially safe because the lock is acquired right before the loop. In mm/slab.c, it is less so: the lock is acquired by an outer function a few steps back in the stack, so a VM_BUG_ON() is added to make sure it is indeed safe. A comment is also added to detail why we are returning the value of the parent cache and ignoring the children's when we propagate the attributes. Signed-off-by: Glauber Costa Cc: Michal Hocko Cc: Kamezawa Hiroyuki Cc: Johannes Weiner Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ea02ff970836..0108a56f814e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -422,6 +422,12 @@ static inline void sock_release_memcg(struct sock *sk) extern struct static_key memcg_kmem_enabled_key; extern int memcg_limited_groups_array_size; + +/* + * Helper macro to loop through all memcg-specific caches. Callers must still + * check if the cache is valid (it is either valid or NULL). + * the slab_mutex must be held when looping through those caches + */ #define for_each_memcg_cache_index(_idx) \ for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++) -- cgit v1.2.3 From 7179e7bf4592ac5a7b30257a7df6259ee81e51da Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Tue, 18 Dec 2012 14:23:19 -0800 Subject: mm/hugetlb: create hugetlb cgroup file in hugetlb_init Build kernel with CONFIG_HUGETLBFS=y,CONFIG_HUGETLB_PAGE=y and CONFIG_CGROUP_HUGETLB=y, then specify hugepagesz=xx boot option, system will fail to boot. This failure is caused by following code path: setup_hugepagesz hugetlb_add_hstate hugetlb_cgroup_file_init cgroup_add_cftypes kzalloc <--slab is *not available* yet For this path, slab is not available yet, so memory allocated will be failed, and cause WARN_ON() in hugetlb_cgroup_file_init(). So I move hugetlb_cgroup_file_init() into hugetlb_init(). [akpm@linux-foundation.org: tweak coding-style, remove pointless __init on inlined function] [akpm@linux-foundation.org: fix warning] Signed-off-by: Jianguo Wu Signed-off-by: Jiang Liu Reviewed-by: Aneesh Kumar K.V Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb_cgroup.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index d73878c694b3..ce8217f7b5c2 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page); extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg); -extern int hugetlb_cgroup_file_init(int idx) __init; +extern void hugetlb_cgroup_file_init(void) __init; extern void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage); @@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, return; } -static inline int __init hugetlb_cgroup_file_init(int idx) +static inline void hugetlb_cgroup_file_init(void) { - return 0; } static inline void hugetlb_cgroup_migrate(struct page *oldhpage, -- cgit v1.2.3 From 59771079c18c44e39106f0f30054025acafadb41 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 19 Dec 2012 07:18:35 -0800 Subject: blk: avoid divide-by-zero with zero discard granularity Commit 8dd2cb7e880d ("block: discard granularity might not be power of 2") changed a couple of 'binary and' operations into modulus operations. Which turned the harmless case of a zero discard_granularity into a possible divide-by-zero. The code also had a much more subtle bug: it was doing the modulus of a value in bytes using 'sector_t'. That was always conceptually wrong, but didn't actually matter back when the code assumed a power-of-two granularity: we only looked at the low bits anyway. But with potentially arbitrary sector numbers, using a 'sector_t' to express bytes is very very wrong: depending on configuration it limits the starting offset of the device to just 32 bits, and any overflow would result in a wrong value if the modulus wasn't a power-of-two. So re-write the code to not only protect against the divide-by-zero, but to do the starting sector arithmetic in sectors, and using the proper types. [ For any mathematicians out there: it also looks monumentally stupid to do the 'modulo granularity' operation *twice*, never mind having a "+ granularity" in the second modulus op. But that's the easiest way to avoid negative values or overflow, and it is how the original code was done. ] Reported-by: Ingo Molnar Reported-by: Doug Anderson Cc: Neil Brown Cc: Shaohua Li Acked-by: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index acb4f7bbbd32..f94bc83011ed 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1188,14 +1188,25 @@ static inline int queue_discard_alignment(struct request_queue *q) static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) { - sector_t alignment = sector << 9; - alignment = sector_div(alignment, lim->discard_granularity); + unsigned int alignment, granularity, offset; if (!lim->max_discard_sectors) return 0; - alignment = lim->discard_granularity + lim->discard_alignment - alignment; - return sector_div(alignment, lim->discard_granularity); + /* Why are these in bytes, not sectors? */ + alignment = lim->discard_alignment >> 9; + granularity = lim->discard_granularity >> 9; + if (!granularity) + return 0; + + /* Offset of the partition start in 'granularity' sectors */ + offset = sector_div(sector, granularity); + + /* And why do we do this modulus *again* in blkdev_issue_discard()? */ + offset = (granularity + alignment - offset) % granularity; + + /* Turn it back into bytes, gaah */ + return offset << 9; } static inline int bdev_discard_alignment(struct block_device *bdev) -- cgit v1.2.3 From ab28698d33af05abab0bcf8021eafe38f7434f24 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Wed, 19 Dec 2012 09:10:09 -0600 Subject: of: define struct device in of_platform.h if !OF_DEVICE and !OF_ADDRESS Fixes the following warning: include/linux/of_platform.h:106:13: warning: 'struct device' declared inside parameter list [enabled by default] include/linux/of_platform.h:106:13: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] Signed-off-by: Jonas Gorski Signed-off-by: Rob Herring Signed-off-by: Grant Likely --- include/linux/of_platform.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index b47d2040c9f2..3863a4dbdf18 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -100,6 +100,7 @@ extern int of_platform_populate(struct device_node *root, #if !defined(CONFIG_OF_ADDRESS) struct of_dev_auxdata; +struct device; static inline int of_platform_populate(struct device_node *root, const struct of_device_id *matches, const struct of_dev_auxdata *lookup, -- cgit v1.2.3 From 3c439b5586e9200f7e6287ee77c175c4d5b0eeed Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 6 Dec 2012 17:12:00 +0000 Subject: mlx4_core: Allow choosing flow steering mode Device managed flow steering will be enabled only under administrator directive provided through setting the existing module parameter log_num_mgm_entry_size to -1 (if the device actually supports flow steering). If flow steering isn't requested or not available, the driver will use the value of log_num_mgm_entry_size and B0 steering. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 21821da2abfd..20ea939c22a6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -625,6 +625,7 @@ struct mlx4_dev { u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; int num_vfs; + int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; }; -- cgit v1.2.3 From a1c088e01b71d90852b0df5a77cdae46bd0e0c05 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 18 Dec 2012 04:45:29 +0000 Subject: usbnet: handle PM failure gracefully If a device fails to do remote wakeup, this is no reason to abort an open totally. This patch just continues without runtime PM. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 9bbeabf66c54..288b32aadab2 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -69,6 +69,7 @@ struct usbnet { # define EVENT_DEV_ASLEEP 6 # define EVENT_DEV_OPEN 7 # define EVENT_DEVICE_REPORT_IDLE 8 +# define EVENT_NO_RUNTIME_PM 9 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- cgit v1.2.3 From 2dd7c8cf29769f6b66f26b501db2364640c2c9d0 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 18 Dec 2012 04:45:52 +0000 Subject: usbnet: generic manage_power() Centralise common code for manage_power() in usbnet by making a generic simple implementation Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 288b32aadab2..bd45eb7bedc8 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -241,4 +241,6 @@ extern void usbnet_set_msglevel(struct net_device *, u32); extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); extern int usbnet_nway_reset(struct net_device *net); +extern int usbnet_manage_power(struct usbnet *, int); + #endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3 From cf13a84d174947df4bb809edfb4887393642303e Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Fri, 5 Oct 2012 12:16:09 +0200 Subject: watchdog: WatchDog Timer Driver Core: fix comment Signed-off-by: Fabio Porcedda Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 87490ac4bd87..3a9df2f43be6 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -129,7 +129,7 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) return wdd->driver_data; } -/* drivers/watchdog/core/watchdog_core.c */ +/* drivers/watchdog/watchdog_core.c */ extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); -- cgit v1.2.3 From 468366138850f20543f1d4878028900672b23dae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Nov 2012 09:12:59 -0500 Subject: COMPAT_SYSCALL_DEFINE: infrastructure Signed-off-by: Al Viro --- include/linux/compat.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 784ebfe63c48..a7877fa809fd 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -23,6 +23,48 @@ #define COMPAT_USE_64BIT_TIME 0 #endif +#ifndef __SC_DELOUSE +#define __SC_DELOUSE(t,v) ((t)(unsigned long)(v)) +#endif + +#define __SC_CCAST1(t1, a1) __SC_DELOUSE(t1,a1) +#define __SC_CCAST2(t2, a2, ...) __SC_DELOUSE(t2,a2), __SC_CCAST1(__VA_ARGS__) +#define __SC_CCAST3(t3, a3, ...) __SC_DELOUSE(t3,a3), __SC_CCAST2(__VA_ARGS__) +#define __SC_CCAST4(t4, a4, ...) __SC_DELOUSE(t4,a4), __SC_CCAST3(__VA_ARGS__) +#define __SC_CCAST5(t5, a5, ...) __SC_DELOUSE(t5,a5), __SC_CCAST4(__VA_ARGS__) +#define __SC_CCAST6(t6, a6, ...) __SC_DELOUSE(t6,a6), __SC_CCAST5(__VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE1(name, ...) \ + COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE2(name, ...) \ + COMPAT_SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE3(name, ...) \ + COMPAT_SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE4(name, ...) \ + COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE5(name, ...) \ + COMPAT_SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE6(name, ...) \ + COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) + +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS + +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__)); \ + static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ + asmlinkage long compat_SyS##name(__SC_LONG##x(__VA_ARGS__)) \ + { \ + return (long) C_SYSC##name(__SC_CCAST##x(__VA_ARGS__)); \ + } \ + SYSCALL_ALIAS(compat_sys##name, compat_SyS##name); \ + static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__)) + +#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__)) + +#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) -- cgit v1.2.3 From ae903caae267154de7cf8576b130ff474630596b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Dec 2012 12:44:11 -0500 Subject: Bury the conditionals from kernel_thread/kernel_execve series All architectures have CONFIG_GENERIC_KERNEL_THREAD CONFIG_GENERIC_KERNEL_EXECVE __ARCH_WANT_SYS_EXECVE None of them have __ARCH_WANT_KERNEL_EXECVE and there are only two callers of kernel_execve() (which is a trivial wrapper for do_execve() now) left. Kill the conditionals and make both callers use do_execve(). Signed-off-by: Al Viro --- include/linux/binfmts.h | 4 ---- include/linux/sched.h | 2 -- include/linux/syscalls.h | 9 --------- 3 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 2630c9b41a86..8c1388c6ae27 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -121,8 +121,4 @@ extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); -#ifdef __ARCH_WANT_KERNEL_EXECVE -extern void ret_from_kernel_execve(struct pt_regs *normal) __noreturn; -#endif - #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 1162258bcaf0..9e5a54e3d84f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,9 +2291,7 @@ extern int do_execve(const char *, const char __user * const __user *); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); -#ifdef CONFIG_GENERIC_KERNEL_THREAD extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -#endif extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 91835e7f364d..9fe5f946526e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -827,15 +827,6 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, const char __user *pathname); asmlinkage long sys_syncfs(int fd); -#ifndef CONFIG_GENERIC_KERNEL_EXECVE -int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); -#else -#define kernel_execve(filename, argv, envp) \ - do_execve(filename, \ - (const char __user *const __user *)argv, \ - (const char __user *const __user *)envp) -#endif - asmlinkage long sys_fork(void); asmlinkage long sys_vfork(void); #ifdef CONFIG_CLONE_BACKWARDS -- cgit v1.2.3 From 1ca97bb541a1f5a735e697a8bba763cde3aab452 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 18 Nov 2012 12:50:10 -0500 Subject: new helper: current_user_stack_pointer() Cross-architecture equivalent of rdusp(); default is user_stack_pointer(current_pt_regs()) - that works for almost all platforms that have usp saved in pt_regs. The only exception from that is ia64 - we want memory stack, not the backing store for register one. Signed-off-by: Al Viro --- include/linux/ptrace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index a89ff04bddd9..a3a9d085f932 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -342,6 +342,10 @@ static inline void user_single_step_siginfo(struct task_struct *tsk, #define signal_pt_regs() task_pt_regs(current) #endif +#ifndef current_user_stack_pointer +#define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) +#endif + extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); -- cgit v1.2.3 From 5c49574ffd7ac07eae8c3b065d19e6ebc7e4760f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 18 Nov 2012 15:29:16 -0500 Subject: new helper: restore_altstack() to be used by rt_sigreturn instances Signed-off-by: Al Viro --- include/linux/signal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index e19a011b43b7..5969522136fe 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -385,4 +385,6 @@ int unhandled_signal(struct task_struct *tsk, int sig); void signals_init(void); +int restore_altstack(const stack_t __user *); + #endif /* _LINUX_SIGNAL_H */ -- cgit v1.2.3 From 9b064fc3f95a8e44e929fdf4d6037334ea03d15b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Dec 2012 13:49:35 -0500 Subject: new helper: compat_user_stack_pointer() Compat counterpart of current_user_stack_pointer(); for most of the biarch architectures those two are identical, but e.g. arm64 and arm use different registers for stack pointer... Note that amd64 variants of current_user_stack_pointer/compat_user_stack_pointer do *not* rely on pt_regs having been through FIXUP_TOP_OF_STACK. Signed-off-by: Al Viro --- include/linux/compat.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index a7877fa809fd..62bb76f91baf 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -65,6 +65,9 @@ #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ +#ifndef compat_user_stack_pointer +#define compat_user_stack_pointer() current_user_stack_pointer() +#endif #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) -- cgit v1.2.3 From 6bf9adfc90370b695cb111116e15fdc0e1906270 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Dec 2012 14:09:47 -0500 Subject: introduce generic sys_sigaltstack(), switch x86 and um to it Conditional on CONFIG_GENERIC_SIGALTSTACK; architectures that do not select it are completely unaffected Signed-off-by: Al Viro --- include/linux/syscalls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9fe5f946526e..6ca1e08210c6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -63,6 +63,7 @@ struct getcpu_cache; struct old_linux_dirent; struct perf_event_attr; struct file_handle; +struct sigaltstack; #include #include @@ -299,6 +300,11 @@ asmlinkage long sys_personality(unsigned int personality); asmlinkage long sys_sigpending(old_sigset_t __user *set); asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset); +#ifdef CONFIG_GENERIC_SIGALTSTACK +asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, + struct sigaltstack __user *uoss); +#endif + asmlinkage long sys_getitimer(int which, struct itimerval __user *value); asmlinkage long sys_setitimer(int which, struct itimerval __user *value, -- cgit v1.2.3 From 9026843952adac5b123c7b8dc961e5c15828d9e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Dec 2012 14:47:53 -0500 Subject: generic compat_sys_sigaltstack() Again, conditional on CONFIG_GENERIC_SIGALTSTACK Signed-off-by: Al Viro --- include/linux/compat.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 62bb76f91baf..cb5637e2ee2c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -68,6 +68,16 @@ #ifndef compat_user_stack_pointer #define compat_user_stack_pointer() current_user_stack_pointer() #endif +#ifdef CONFIG_GENERIC_SIGALTSTACK +#ifndef compat_sigaltstack /* we'll need that for MIPS */ +typedef struct compat_sigaltstack { + compat_uptr_t ss_sp; + int ss_flags; + compat_size_t ss_size; +} compat_stack_t; +#endif +#endif + #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) @@ -632,6 +642,12 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, compat_size_t count); +#ifdef CONFIG_GENERIC_SIGALTSTACK +asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, + compat_stack_t __user *uoss_ptr); + +int compat_restore_altstack(const compat_stack_t __user *uss); +#endif #else -- cgit v1.2.3 From c40702c49faef05ae324f121d8b3e215244ee152 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Nov 2012 14:24:26 -0500 Subject: new helpers: __save_altstack/__compat_save_altstack, switch x86 and um to those note that they are relying on access_ok() already checked by caller. Signed-off-by: Al Viro --- include/linux/compat.h | 1 + include/linux/signal.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index cb5637e2ee2c..334813307ec1 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -647,6 +647,7 @@ asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, compat_stack_t __user *uoss_ptr); int compat_restore_altstack(const compat_stack_t __user *uss); +int __compat_save_altstack(compat_stack_t __user *, unsigned long); #endif #else diff --git a/include/linux/signal.h b/include/linux/signal.h index 5969522136fe..0a89ffc48466 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -386,5 +386,6 @@ int unhandled_signal(struct task_struct *tsk, int sig); void signals_init(void); int restore_altstack(const stack_t __user *); +int __save_altstack(stack_t __user *, unsigned long); #endif /* _LINUX_SIGNAL_H */ -- cgit v1.2.3 From ada65c74059f8c104f1b467c126205471634c435 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 12 Dec 2012 10:23:03 +0100 Subject: dma-buf: remove fallback for !CONFIG_DMA_SHARED_BUFFER Documentation says that code requiring dma-buf should add it to select, so inline fallbacks are not going to be used. A link error will make it obvious what went wrong, instead of silently doing nothing at runtime. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Reviewed-by: Rob Clark Signed-off-by: Sumit Semwal --- include/linux/dma-buf.h | 99 ------------------------------------------------- 1 file changed, 99 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index eb48f3816df9..bd2e52ccc4f2 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -156,7 +156,6 @@ static inline void get_dma_buf(struct dma_buf *dmabuf) get_file(dmabuf->file); } -#ifdef CONFIG_DMA_SHARED_BUFFER struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev); void dma_buf_detach(struct dma_buf *dmabuf, @@ -184,103 +183,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); void *dma_buf_vmap(struct dma_buf *); void dma_buf_vunmap(struct dma_buf *, void *vaddr); -#else - -static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, - struct device *dev) -{ - return ERR_PTR(-ENODEV); -} - -static inline void dma_buf_detach(struct dma_buf *dmabuf, - struct dma_buf_attachment *dmabuf_attach) -{ - return; -} - -static inline struct dma_buf *dma_buf_export(void *priv, - const struct dma_buf_ops *ops, - size_t size, int flags) -{ - return ERR_PTR(-ENODEV); -} - -static inline int dma_buf_fd(struct dma_buf *dmabuf, int flags) -{ - return -ENODEV; -} - -static inline struct dma_buf *dma_buf_get(int fd) -{ - return ERR_PTR(-ENODEV); -} - -static inline void dma_buf_put(struct dma_buf *dmabuf) -{ - return; -} - -static inline struct sg_table *dma_buf_map_attachment( - struct dma_buf_attachment *attach, enum dma_data_direction write) -{ - return ERR_PTR(-ENODEV); -} - -static inline void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, - struct sg_table *sg, enum dma_data_direction dir) -{ - return; -} - -static inline int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, - size_t start, size_t len, - enum dma_data_direction dir) -{ - return -ENODEV; -} - -static inline void dma_buf_end_cpu_access(struct dma_buf *dmabuf, - size_t start, size_t len, - enum dma_data_direction dir) -{ -} - -static inline void *dma_buf_kmap_atomic(struct dma_buf *dmabuf, - unsigned long pnum) -{ - return NULL; -} - -static inline void dma_buf_kunmap_atomic(struct dma_buf *dmabuf, - unsigned long pnum, void *vaddr) -{ -} - -static inline void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long pnum) -{ - return NULL; -} - -static inline void dma_buf_kunmap(struct dma_buf *dmabuf, - unsigned long pnum, void *vaddr) -{ -} - -static inline int dma_buf_mmap(struct dma_buf *dmabuf, - struct vm_area_struct *vma, - unsigned long pgoff) -{ - return -ENODEV; -} - -static inline void *dma_buf_vmap(struct dma_buf *dmabuf) -{ - return NULL; -} - -static inline void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) -{ -} -#endif /* CONFIG_DMA_SHARED_BUFFER */ #endif /* __DMA_BUF_H__ */ -- cgit v1.2.3 From 39e3c9553f34381a1b664c27b0c696a266a5735e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 28 Nov 2012 11:30:53 -0500 Subject: vfs: remove DCACHE_NEED_LOOKUP The code that relied on that flag was ripped out of btrfs quite some time ago, and never added back. Josef indicated that he was going to take a different approach to the problem in btrfs, and that we could just eliminate this flag. Cc: Josef Bacik Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/dcache.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 59200795482e..c1754b59ddd3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -202,7 +202,6 @@ struct dentry_operations { #define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ #define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ #define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ -#define DCACHE_NEED_LOOKUP 0x80000 /* dentry requires i_op->lookup */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) @@ -408,13 +407,6 @@ static inline bool d_mountpoint(struct dentry *dentry) return dentry->d_flags & DCACHE_MOUNTED; } -static inline bool d_need_lookup(struct dentry *dentry) -{ - return dentry->d_flags & DCACHE_NEED_LOOKUP; -} - -extern void d_clear_need_lookup(struct dentry *dentry); - extern int sysctl_vfs_cache_pressure; #endif /* __LINUX_DCACHE_H */ -- cgit v1.2.3 From c4d6d8dbf335c7fa47341654a37c53a512b519bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:32 +0000 Subject: CacheFiles: Fix the marking of cached pages Under some circumstances CacheFiles defers the marking of pages with PG_fscache so that it can take advantage of pagevecs to reduce the number of calls to fscache_mark_pages_cached() and the netfs's hook to keep track of this. There are, however, two problems with this: (1) It can lead to the PG_fscache mark being applied _after_ the page is set PG_uptodate and unlocked (by the call to fscache_end_io()). (2) CacheFiles's ref on the page is dropped immediately following fscache_end_io() - and so may not still be held when the mark is applied. This can lead to the page being passed back to the allocator before the mark is applied. Fix this by, where appropriate, marking the page before calling fscache_end_io() and releasing the page. This means that we can't take advantage of pagevecs and have to make a separate call for each page to the marking routines. The symptoms of this are Bad Page state errors cropping up under memory pressure, for example: BUG: Bad page state in process tar pfn:002da page:ffffea0000009fb0 count:0 mapcount:0 mapping: (null) index:0x1447 page flags: 0x1000(private_2) Pid: 4574, comm: tar Tainted: G W 3.1.0-rc4-fsdevel+ #1064 Call Trace: [] ? dump_page+0xb9/0xbe [] bad_page+0xd5/0xea [] get_page_from_freelist+0x35b/0x46a [] __alloc_pages_nodemask+0x362/0x662 [] __do_page_cache_readahead+0x13a/0x267 [] ? __do_page_cache_readahead+0xa2/0x267 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x28b/0x29a [] ? ondemand_readahead+0x163/0x29a [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x2ab/0x67e [] nfs_file_read+0xa4/0xc9 [nfs] [] do_sync_read+0xba/0xfa [] ? security_file_permission+0x7b/0x84 [] ? rw_verify_area+0xab/0xc8 [] vfs_read+0xaa/0x13a [] sys_read+0x45/0x6c [] system_call_fastpath+0x16/0x1b As can be seen, PG_private_2 (== PG_fscache) is set in the page flags. Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was set appropriately showed that sometimes it wasn't. This led to the discovery that sometimes the page has apparently been reclaimed by the time the marker got to see it. Reported-by: M. Stevens Signed-off-by: David Howells Reviewed-by: Jeff Layton --- include/linux/fscache-cache.h | 3 +++ include/linux/fscache.h | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ce31408b1e47..9879183b55d8 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -504,6 +504,9 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache); extern void fscache_io_error(struct fscache_cache *cache); +extern void fscache_mark_page_cached(struct fscache_retrieval *op, + struct page *page); + extern void fscache_mark_pages_cached(struct fscache_retrieval *op, struct pagevec *pagevec); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9ec20dec3353..f4b6353543bf 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -135,14 +135,14 @@ struct fscache_cookie_def { */ void (*put_context)(void *cookie_netfs_data, void *context); - /* indicate pages that now have cache metadata retained - * - this function should mark the specified pages as now being cached - * - the pages will have been marked with PG_fscache before this is + /* indicate page that now have cache metadata retained + * - this function should mark the specified page as now being cached + * - the page will have been marked with PG_fscache before this is * called, so this is optional */ - void (*mark_pages_cached)(void *cookie_netfs_data, - struct address_space *mapping, - struct pagevec *cached_pvec); + void (*mark_page_cached)(void *cookie_netfs_data, + struct address_space *mapping, + struct page *page); /* indicate the cookie is no longer cached * - this function is called when the backing store currently caching -- cgit v1.2.3 From ef46ed888efb1e8da33be5d33c9b54476289a43b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:35 +0000 Subject: FS-Cache: Make cookie relinquishment wait for outstanding reads Make fscache_relinquish_cookie() log a warning and wait if there are any outstanding reads left on the cookie it was given. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 9879183b55d8..e3d6d939d959 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -301,6 +301,7 @@ struct fscache_cookie { #define FSCACHE_COOKIE_PENDING_FILL 3 /* T if pending initial fill on object */ #define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ #define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ +#define FSCACHE_COOKIE_WAITING_ON_READS 6 /* T if cookie is waiting on reads */ }; extern struct fscache_cookie fscache_fsdef_index; -- cgit v1.2.3 From 9f10523f891928330b7529da54c1a3cc65180b1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:35 +0000 Subject: FS-Cache: Fix operation state management and accounting Fix the state management of internal fscache operations and the accounting of what operations are in what states. This is done by: (1) Give struct fscache_operation a enum variable that directly represents the state it's currently in, rather than spreading this knowledge over a bunch of flags, who's processing the operation at the moment and whether it is queued or not. This makes it easier to write assertions to check the state at various points and to prevent invalid state transitions. (2) Add an 'operation complete' state and supply a function to indicate the completion of an operation (fscache_op_complete()) and make things call it. The final call to fscache_put_operation() can then check that an op in the appropriate state (complete or cancelled). (3) Adjust the use of object->n_ops, ->n_in_progress, ->n_exclusive to better govern the state of an object: (a) The ->n_ops is now the number of extant operations on the object and is now decremented by fscache_put_operation() only. (b) The ->n_in_progress is simply the number of objects that have been taken off of the object's pending queue for the purposes of being run. This is decremented by fscache_op_complete() only. (c) The ->n_exclusive is the number of exclusive ops that have been submitted and queued or are in progress. It is decremented by fscache_op_complete() and by fscache_cancel_op(). fscache_put_operation() and fscache_operation_gc() now no longer try to clean up ->n_exclusive and ->n_in_progress. That was leading to double decrements against fscache_cancel_op(). fscache_cancel_op() now no longer decrements ->n_ops. That was leading to double decrements against fscache_put_operation(). fscache_submit_exclusive_op() now decides whether it has to queue an op based on ->n_in_progress being > 0 rather than ->n_ops > 0 as the latter will persist in being true even after all preceding operations have been cancelled or completed. Furthermore, if an object is active and there are runnable ops against it, there must be at least one op running. (4) Add a remaining-pages counter (n_pages) to struct fscache_retrieval and provide a function to record completion of the pages as they complete. When n_pages reaches 0, the operation is deemed to be complete and fscache_op_complete() is called. Add calls to fscache_retrieval_complete() anywhere we've finished with a page we've been given to read or allocate for. This includes places where we just return pages to the netfs for reading from the server and where accessing the cache fails and we discard the proposed netfs page. The bugs in the unfixed state management manifest themselves as oopses like the following where the operation completion gets out of sync with return of the cookie by the netfs. This is possible because the cache unlocks and returns all the netfs pages before recording its completion - which means that there's nothing to stop the netfs discarding them and returning the cookie. FS-Cache: Cookie 'NFS.fh' still has outstanding reads ------------[ cut here ]------------ kernel BUG at fs/fscache/cookie.c:519! invalid opcode: 0000 [#1] SMP CPU 1 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 400, comm: kswapd0 Not tainted 3.1.0-rc7-fsdevel+ #1090 /DG965RY RIP: 0010:[] [] __fscache_relinquish_cookie+0x170/0x343 [fscache] RSP: 0018:ffff8800368cfb00 EFLAGS: 00010282 RAX: 000000000000003c RBX: ffff880023cc8790 RCX: 0000000000000000 RDX: 0000000000002f2e RSI: 0000000000000001 RDI: ffffffff813ab86c RBP: ffff8800368cfb50 R08: 0000000000000002 R09: 0000000000000000 R10: ffff88003a1b7890 R11: ffff88001df6e488 R12: ffff880023d8ed98 R13: ffff880023cc8798 R14: 0000000000000004 R15: ffff88003b8bf370 FS: 0000000000000000(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000008ba008 CR3: 0000000023d93000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kswapd0 (pid: 400, threadinfo ffff8800368ce000, task ffff88003b8bf040) Stack: ffff88003b8bf040 ffff88001df6e528 ffff88001df6e528 ffffffffa00b46b0 ffff88003b8bf040 ffff88001df6e488 ffff88001df6e620 ffffffffa00b46b0 ffff88001ebd04c8 0000000000000004 ffff8800368cfb70 ffffffffa00b2c91 Call Trace: [] nfs_fscache_release_inode_cookie+0x3b/0x47 [nfs] [] nfs_clear_inode+0x3c/0x41 [nfs] [] nfs4_evict_inode+0x2f/0x33 [nfs] [] evict+0xa1/0x15c [] dispose_list+0x2c/0x38 [] prune_icache_sb+0x28c/0x29b [] prune_super+0xd5/0x140 [] shrink_slab+0x102/0x1ab [] balance_pgdat+0x2f2/0x595 [] ? process_timeout+0xb/0xb [] kswapd+0x270/0x289 [] ? __init_waitqueue_head+0x46/0x46 [] ? balance_pgdat+0x595/0x595 [] kthread+0x7f/0x87 [] kernel_thread_helper+0x4/0x10 [] ? finish_task_switch+0x45/0xc0 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x53/0x53 [] ? gs_change+0xb/0xb Signed-off-by: David Howells --- include/linux/fscache-cache.h | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index e3d6d939d959..f5facd1d333f 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -75,6 +75,16 @@ extern wait_queue_head_t fscache_cache_cleared_wq; typedef void (*fscache_operation_release_t)(struct fscache_operation *op); typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +enum fscache_operation_state { + FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ + FSCACHE_OP_ST_INITIALISED, /* Op is initialised */ + FSCACHE_OP_ST_PENDING, /* Op is blocked from running */ + FSCACHE_OP_ST_IN_PROGRESS, /* Op is in progress */ + FSCACHE_OP_ST_COMPLETE, /* Op is complete */ + FSCACHE_OP_ST_CANCELLED, /* Op has been cancelled */ + FSCACHE_OP_ST_DEAD /* Op is now dead */ +}; + struct fscache_operation { struct work_struct work; /* record for async ops */ struct list_head pend_link; /* link in object->pending_ops */ @@ -86,10 +96,10 @@ struct fscache_operation { #define FSCACHE_OP_MYTHREAD 0x0002 /* - processing is done be issuing thread, not pool */ #define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ #define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ -#define FSCACHE_OP_DEAD 6 /* op is now dead */ -#define FSCACHE_OP_DEC_READ_CNT 7 /* decrement object->n_reads on destruction */ -#define FSCACHE_OP_KEEP_FLAGS 0xc0 /* flags to keep when repurposing an op */ +#define FSCACHE_OP_DEC_READ_CNT 6 /* decrement object->n_reads on destruction */ +#define FSCACHE_OP_KEEP_FLAGS 0x0070 /* flags to keep when repurposing an op */ + enum fscache_operation_state state; atomic_t usage; unsigned debug_id; /* debugging ID */ @@ -106,6 +116,7 @@ extern atomic_t fscache_op_debug_id; extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); +extern void fscache_op_complete(struct fscache_operation *); extern void fscache_put_operation(struct fscache_operation *); /** @@ -122,6 +133,7 @@ static inline void fscache_operation_init(struct fscache_operation *op, { INIT_WORK(&op->work, fscache_op_work_func); atomic_set(&op->usage, 1); + op->state = FSCACHE_OP_ST_INITIALISED; op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->processor = processor; op->release = release; @@ -138,6 +150,7 @@ struct fscache_retrieval { void *context; /* netfs read context (pinned) */ struct list_head to_do; /* list of things to be done by the backend */ unsigned long start_time; /* time at which retrieval started */ + unsigned n_pages; /* number of pages to be retrieved */ }; typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, @@ -173,9 +186,23 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) fscache_enqueue_operation(&op->op); } +/** + * fscache_retrieval_complete - Record (partial) completion of a retrieval + * @op: The retrieval operation affected + * @n_pages: The number of pages to account for + */ +static inline void fscache_retrieval_complete(struct fscache_retrieval *op, + int n_pages) +{ + op->n_pages -= n_pages; + if (op->n_pages <= 0) + fscache_op_complete(&op->op); +} + /** * fscache_put_retrieval - Drop a reference to a retrieval operation * @op: The retrieval operation affected + * @n_pages: The number of pages to account for * * Drop a reference to a retrieval operation. */ @@ -333,10 +360,10 @@ struct fscache_object { int debug_id; /* debugging ID */ int n_children; /* number of child objects */ - int n_ops; /* number of ops outstanding on object */ + int n_ops; /* number of extant ops on object */ int n_obj_ops; /* number of object ops outstanding on object */ int n_in_progress; /* number of ops in progress */ - int n_exclusive; /* number of exclusive ops queued */ + int n_exclusive; /* number of exclusive ops queued or in progress */ atomic_t n_reads; /* number of read ops in progress */ spinlock_t lock; /* state and operations lock */ -- cgit v1.2.3 From ef778e7ae67cd426c30cad43378b908f5eb0bad5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: FS-Cache: Provide proper invalidation Provide a proper invalidation method rather than relying on the netfs retiring the cookie it has and getting a new one. The problem with this is that isn't easy for the netfs to make sure that it has completed/cancelled all its outstanding storage and retrieval operations on the cookie it is retiring. Instead, have the cache provide an invalidation method that will cancel or wait for all currently outstanding operations before invalidating the cache, and will cause new operations to queue up behind that. Whilst invalidation is in progress, some requests will be rejected until the cache can stack a barrier on the operation queue to cause new operations to be deferred behind it. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 8 +++++++- include/linux/fscache.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index f5facd1d333f..1e454ad7a832 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -254,6 +254,9 @@ struct fscache_cache_ops { /* store the updated auxiliary data on an object */ void (*update_object)(struct fscache_object *object); + /* Invalidate an object */ + void (*invalidate_object)(struct fscache_operation *op); + /* discard the resources pinned by an object and effect retirement if * necessary */ void (*drop_object)(struct fscache_object *object); @@ -329,6 +332,7 @@ struct fscache_cookie { #define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ #define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ #define FSCACHE_COOKIE_WAITING_ON_READS 6 /* T if cookie is waiting on reads */ +#define FSCACHE_COOKIE_INVALIDATING 7 /* T if cookie is being invalidated */ }; extern struct fscache_cookie fscache_fsdef_index; @@ -345,6 +349,7 @@ struct fscache_object { /* active states */ FSCACHE_OBJECT_AVAILABLE, /* cleaning up object after creation */ FSCACHE_OBJECT_ACTIVE, /* object is usable */ + FSCACHE_OBJECT_INVALIDATING, /* object is invalidating */ FSCACHE_OBJECT_UPDATING, /* object is updating */ /* terminal states */ @@ -378,7 +383,8 @@ struct fscache_object { #define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ #define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ #define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ -#define FSCACHE_OBJECT_EVENTS_MASK 0x7f /* mask of all events*/ +#define FSCACHE_OBJECT_EV_INVALIDATE 7 /* T if cache requested object invalidation */ +#define FSCACHE_OBJECT_EVENTS_MASK 0xff /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4b6353543bf..7a086235da4b 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -185,6 +185,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie( extern void __fscache_relinquish_cookie(struct fscache_cookie *, int); extern void __fscache_update_cookie(struct fscache_cookie *); extern int __fscache_attr_changed(struct fscache_cookie *); +extern void __fscache_invalidate(struct fscache_cookie *); +extern void __fscache_wait_on_invalidate(struct fscache_cookie *); extern int __fscache_read_or_alloc_page(struct fscache_cookie *, struct page *, fscache_rw_complete_t, @@ -389,6 +391,42 @@ int fscache_attr_changed(struct fscache_cookie *cookie) return -ENOBUFS; } +/** + * fscache_invalidate - Notify cache that an object needs invalidation + * @cookie: The cookie representing the cache object + * + * Notify the cache that an object is needs to be invalidated and that it + * should abort any retrievals or stores it is doing on the cache. The object + * is then marked non-caching until such time as the invalidation is complete. + * + * This can be called with spinlocks held. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_invalidate(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_invalidate(cookie); +} + +/** + * fscache_wait_on_invalidate - Wait for invalidation to complete + * @cookie: The cookie representing the cache object + * + * Wait for the invalidation of an object to complete. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_wait_on_invalidate(cookie); +} + /** * fscache_reserve_space - Reserve data space for a cached object * @cookie: The cookie representing the cache object -- cgit v1.2.3 From a02de9608595c8ef649ef03ae735b0b45e3d4396 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: VFS: Make more complete truncate operation available to CacheFiles Make a more complete truncate operation available to CacheFiles (including security checks and suchlike) so that it can use this to clear invalidated cache files. Signed-off-by: David Howells Acked-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a823d4be38e7..017a15b707e2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1999,6 +1999,7 @@ struct filename { bool separate; /* should "name" be freed? */ }; +extern long vfs_truncate(struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int do_fallocate(struct file *file, int mode, loff_t offset, -- cgit v1.2.3 From 36a02de5d7981435931d4608ee3e510b752e072b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:46 +0000 Subject: FS-Cache: Convert the object event ID #defines into an enum Convert the fscache_object event IDs from #defines into an enum. Also add an extra label to the enum to carry the event count and redefine the event mask in terms of that. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 1e454ad7a832..73e68c8d5df4 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -337,6 +337,23 @@ struct fscache_cookie { extern struct fscache_cookie fscache_fsdef_index; +/* + * Event list for fscache_object::{event_mask,events} + */ +enum { + FSCACHE_OBJECT_EV_REQUEUE, /* T if object should be requeued */ + FSCACHE_OBJECT_EV_UPDATE, /* T if object should be updated */ + FSCACHE_OBJECT_EV_INVALIDATE, /* T if cache requested object invalidation */ + FSCACHE_OBJECT_EV_CLEARED, /* T if accessors all gone */ + FSCACHE_OBJECT_EV_ERROR, /* T if fatal error occurred during processing */ + FSCACHE_OBJECT_EV_RELEASE, /* T if netfs requested object release */ + FSCACHE_OBJECT_EV_RETIRE, /* T if netfs requested object retirement */ + FSCACHE_OBJECT_EV_WITHDRAW, /* T if cache requested object withdrawal */ + NR_FSCACHE_OBJECT_EVENTS +}; + +#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1) + /* * on-disk cache file or index handle */ @@ -376,15 +393,6 @@ struct fscache_object { unsigned long event_mask; /* events this object is interested in */ unsigned long events; /* events to be processed by this object * (order is important - using fls) */ -#define FSCACHE_OBJECT_EV_REQUEUE 0 /* T if object should be requeued */ -#define FSCACHE_OBJECT_EV_UPDATE 1 /* T if object should be updated */ -#define FSCACHE_OBJECT_EV_CLEARED 2 /* T if accessors all gone */ -#define FSCACHE_OBJECT_EV_ERROR 3 /* T if fatal error occurred during processing */ -#define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ -#define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ -#define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ -#define FSCACHE_OBJECT_EV_INVALIDATE 7 /* T if cache requested object invalidation */ -#define FSCACHE_OBJECT_EVENTS_MASK 0xff /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ -- cgit v1.2.3 From 1f372dff1da37e2b36ae9085368fa46896398598 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Dec 2012 20:03:13 +0000 Subject: FS-Cache: Mark cancellation of in-progress operation Mark as cancelled an operation that is in progress rather than pending at the time it is cancelled, and call fscache_complete_op() to cancel an operation so that blocked ops can be started. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 73e68c8d5df4..5dfa0aa216b6 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -116,7 +116,7 @@ extern atomic_t fscache_op_debug_id; extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); -extern void fscache_op_complete(struct fscache_operation *); +extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); /** @@ -196,7 +196,7 @@ static inline void fscache_retrieval_complete(struct fscache_retrieval *op, { op->n_pages -= n_pages; if (op->n_pages <= 0) - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); } /** -- cgit v1.2.3 From d30357f2f0ec0bfb67fd39f8f76d22d02d78631e Mon Sep 17 00:00:00 2001 From: Marco Stornelli Date: Sat, 15 Dec 2012 11:59:20 +0100 Subject: vfs: drop vmtruncate Removed vmtruncate Signed-off-by: Marco Stornelli Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a823d4be38e7..a0c5ba57ffc5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1565,7 +1565,6 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); - void (*truncate) (struct inode *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); -- cgit v1.2.3 From 7898575fc81bd707ce0844cb06874d48e39bbe09 Mon Sep 17 00:00:00 2001 From: Marco Stornelli Date: Sat, 15 Dec 2012 12:00:02 +0100 Subject: mm: drop vmtruncate Removed vmtruncate Signed-off-by: Marco Stornelli Signed-off-by: Al Viro --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7f4f906190bd..63204078f72b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1007,7 +1007,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); -extern int vmtruncate(struct inode *inode, loff_t offset); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); -- cgit v1.2.3 From 471667391a92bf7bf2cd4ff31a3ad88e5dec934b Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Thu, 13 Dec 2012 12:22:39 +0100 Subject: vfs: Remove useless function prototypes Commit 8e22cc88d68ca1a46d7d582938f979eb640ed30f removes the (un)lock_super function definitions but forgets to remove their prototypes. Signed-off-by: Alessio Igor Bogani Signed-off-by: Al Viro --- include/linux/fs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a0c5ba57ffc5..05cd238ad941 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1445,10 +1445,6 @@ static inline void sb_start_intwrite(struct super_block *sb) extern bool inode_owner_or_capable(const struct inode *inode); -/* not quite ready to be deprecated, but... */ -extern void lock_super(struct super_block *); -extern void unlock_super(struct super_block *); - /* * VFS helper functions.. */ -- cgit v1.2.3 From b9d6ba94b875192ef5e2dab92d72beea33b83c3d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Dec 2012 14:59:40 -0500 Subject: vfs: add a retry_estale helper function to handle retries on ESTALE This function is expected to be called from path-based syscalls to help them decide whether to try the lookup and call again in the event that they got an -ESTALE return back on an earier try. Currently, we only retry the call once on an ESTALE error, but in the event that we decide that that's not enough in the future, we should be able to change the logic in this helper without too much effort. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/namei.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 4bf19d8174ed..66542b644804 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -98,4 +98,20 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) ((char *) name)[min(len, maxlen)] = '\0'; } +/** + * retry_estale - determine whether the caller should retry an operation + * @error: the error that would currently be returned + * @flags: flags being used for next lookup attempt + * + * Check to see if the error code was -ESTALE, and then determine whether + * to retry the call based on whether "flags" already has LOOKUP_REVAL set. + * + * Returns true if the caller should try the operation again. + */ +static inline bool +retry_estale(const long error, const unsigned int flags) +{ + return error == -ESTALE && !(flags & LOOKUP_REVAL); +} + #endif /* _LINUX_NAMEI_H */ -- cgit v1.2.3 From 1ac12b4b6d707937f9de6d09622823b2fd0c93ef Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:06 -0500 Subject: vfs: turn is_dir argument to kern_path_create into a lookup_flags arg Where we can pass in LOOKUP_DIRECTORY or LOOKUP_REVAL. Any other flags passed in here are currently ignored. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/namei.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 66542b644804..e998c030061d 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -65,8 +65,8 @@ extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, extern int kern_path(const char *, unsigned, struct path *); -extern struct dentry *kern_path_create(int, const char *, struct path *, int); -extern struct dentry *user_path_create(int, const char __user *, struct path *, int); +extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); +extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, -- cgit v1.2.3 From b66c5984017533316fd1951770302649baf1aa33 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 20 Dec 2012 15:05:16 -0800 Subject: exec: do not leave bprm->interp on stack If a series of scripts are executed, each triggering module loading via unprintable bytes in the script header, kernel stack contents can leak into the command line. Normally execution of binfmt_script and binfmt_misc happens recursively. However, when modules are enabled, and unprintable bytes exist in the bprm->buf, execution will restart after attempting to load matching binfmt modules. Unfortunately, the logic in binfmt_script and binfmt_misc does not expect to get restarted. They leave bprm->interp pointing to their local stack. This means on restart bprm->interp is left pointing into unused stack memory which can then be copied into the userspace argv areas. After additional study, it seems that both recursion and restart remains the desirable way to handle exec with scripts, misc, and modules. As such, we need to protect the changes to interp. This changes the logic to require allocation for any changes to the bprm->interp. To avoid adding a new kmalloc to every exec, the default value is left as-is. Only when passing through binfmt_script or binfmt_misc does an allocation take place. For a proof of concept, see DoTest.sh from: http://www.halfdog.net/Security/2012/LinuxKernelBinfmtScriptStackDataDisclosure/ Signed-off-by: Kees Cook Cc: halfdog Cc: P J P Cc: Alexander Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index a4c2b565c835..bdf3965f0a29 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -112,6 +112,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); +extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); -- cgit v1.2.3 From c4e18497d8fd92eef2c6e7eadcc1a107ccd115ea Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 20 Dec 2012 15:05:42 -0800 Subject: linux/kernel.h: fix DIV_ROUND_CLOSEST with unsigned divisors Commit 263a523d18bc ("linux/kernel.h: Fix warning seen with W=1 due to change in DIV_ROUND_CLOSEST") fixes a warning seen with W=1 due to change in DIV_ROUND_CLOSEST. Unfortunately, the C compiler converts divide operations with unsigned divisors to unsigned, even if the dividend is signed and negative (for example, -10 / 5U = 858993457). The C standard says "If one operand has unsigned int type, the other operand is converted to unsigned int", so the compiler is not to blame. As a result, DIV_ROUND_CLOSEST(0, 2U) and similar operations now return bad values, since the automatic conversion of expressions such as "0 - 2U/2" to unsigned was not taken into account. Fix by checking for the divisor variable type when deciding which operation to perform. This fixes DIV_ROUND_CLOSEST(0, 2U), but still returns bad values for negative dividends divided by unsigned divisors. Mark the latter case as unsupported. One observed effect of this problem is that the s2c_hwmon driver reports a value of 4198403 instead of 0 if the ADC reads 0. Other impact is unpredictable. Problem is seen if the divisor is an unsigned variable or constant and the dividend is less than (divisor/2). Signed-off-by: Guenter Roeck Reported-by: Juergen Beisert Tested-by: Juergen Beisert Cc: Jean Delvare Cc: [3.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d140e8fb075f..c566927efcbd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -77,13 +77,15 @@ /* * Divide positive or negative dividend by positive divisor and round - * to closest integer. Result is undefined for negative divisors. + * to closest integer. Result is undefined for negative divisors and + * for negative dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ { \ typeof(x) __x = x; \ typeof(divisor) __d = divisor; \ - (((typeof(x))-1) > 0 || (__x) > 0) ? \ + (((typeof(x))-1) > 0 || \ + ((typeof(divisor))-1) > 0 || (__x) > 0) ? \ (((__x) + ((__d) / 2)) / (__d)) : \ (((__x) - ((__d) / 2)) / (__d)); \ } \ -- cgit v1.2.3 From d54eaa5a0fde0a202e4e91f200f818edcef15bee Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:36 +0000 Subject: dm: prepare to support WRITE SAME Allow targets to opt in to WRITE SAME support by setting 'num_write_same_requests' in the dm_target structure. A dm device will only advertise WRITE SAME support if all its targets and all its underlying devices support it. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 38d27a10aa5d..d1f6cd8486f2 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -205,6 +205,11 @@ struct dm_target { */ unsigned num_discard_requests; + /* + * The number of WRITE SAME requests that will be submitted to the target. + */ + unsigned num_write_same_requests; + /* target specific data */ void *private; -- cgit v1.2.3 From c0820cf5ad09522bdd9ff68e84841a09c9f339d8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:38 +0000 Subject: dm: introduce per_bio_data Introduce a field per_bio_data_size in struct dm_target. Targets can set this field in the constructor. If a target sets this field to a non-zero value, "per_bio_data_size" bytes of auxiliary data are allocated for each bio submitted to the target. These data can be used for any purpose by the target and help us improve performance by removing some per-target mempools. Per-bio data is accessed with dm_per_bio_data. The argument data_size must be the same as the value per_bio_data_size in dm_target. If the target has a pointer to per_bio_data, it can get a pointer to the bio with dm_bio_from_per_bio_data() function (data_size must be the same as the value passed to dm_per_bio_data). Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index d1f6cd8486f2..6f0e73b4a80d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -210,6 +210,12 @@ struct dm_target { */ unsigned num_write_same_requests; + /* + * The minimum number of extra bytes allocated in each bio for the + * target to use. dm_per_bio_data returns the data location. + */ + unsigned per_bio_data_size; + /* target specific data */ void *private; @@ -246,6 +252,30 @@ struct dm_target_callbacks { int (*congested_fn) (struct dm_target_callbacks *, int); }; +/* + * For bio-based dm. + * One of these is allocated for each bio. + * This structure shouldn't be touched directly by target drivers. + * It is here so that we can inline dm_per_bio_data and + * dm_bio_from_per_bio_data + */ +struct dm_target_io { + struct dm_io *io; + struct dm_target *ti; + union map_info info; + struct bio clone; +}; + +static inline void *dm_per_bio_data(struct bio *bio, size_t data_size) +{ + return (char *)bio - offsetof(struct dm_target_io, clone) - data_size; +} + +static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) +{ + return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); +} + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); -- cgit v1.2.3 From ddbd658f6446a35e4d6ba84812fd71023320cae2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:39 +0000 Subject: dm: move target request nr to dm_target_io This patch moves target_request_nr from map_info to dm_target_io and makes it accessible with dm_bio_get_target_request_nr. This patch is a preparation for the next patch that removes map_info. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 6f0e73b4a80d..eb96ef6fd8b7 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; union map_info { void *ptr; unsigned long long ll; - unsigned target_request_nr; }; /* @@ -193,20 +192,21 @@ struct dm_target { * A number of zero-length barrier requests that will be submitted * to the target for the purpose of flushing cache. * - * The request number will be placed in union map_info->target_request_nr. + * The request number can be accessed with dm_bio_get_target_request_nr. * It is a responsibility of the target driver to remap these requests * to the real underlying devices. */ unsigned num_flush_requests; /* - * The number of discard requests that will be submitted to the - * target. map_info->request_nr is used just like num_flush_requests. + * The number of discard requests that will be submitted to the target. + * The request number can be accessed with dm_bio_get_target_request_nr. */ unsigned num_discard_requests; /* * The number of WRITE SAME requests that will be submitted to the target. + * The request number can be accessed with dm_bio_get_target_request_nr. */ unsigned num_write_same_requests; @@ -263,6 +263,7 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; union map_info info; + unsigned target_request_nr; struct bio clone; }; @@ -276,6 +277,11 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); } +static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio) +{ + return container_of(bio, struct dm_target_io, clone)->target_request_nr; +} + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); -- cgit v1.2.3 From 7de3ee57da4b717050e79c9313a9bf66ccc72519 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:41 +0000 Subject: dm: remove map_info This patch removes map_info from bio-based device mapper targets. map_info is still used for request-based targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index eb96ef6fd8b7..bf6afa2fc432 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -45,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); * = 1: simple remap complete * = 2: The target wants to push back the io */ -typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, - union map_info *map_context); +typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio); typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, union map_info *map_context); @@ -59,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, * 2 : The target wants to push back the io */ typedef int (*dm_endio_fn) (struct dm_target *ti, - struct bio *bio, int error, - union map_info *map_context); + struct bio *bio, int error); typedef int (*dm_request_endio_fn) (struct dm_target *ti, struct request *clone, int error, union map_info *map_context); -- cgit v1.2.3 From 30e6c9fa93cf3dbc7cc6df1d748ad25e4264545a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Dec 2012 17:25:08 +0000 Subject: net: devnet_rename_seq should be a seqcount Using a seqlock for devnet_rename_seq is not a good idea, as device_rename() can sleep. As we hold RTNL, we dont need a protection for writers, and only need a seqcount so that readers can catch a change done by a writer. Bug added in commit c91f6df2db4972d3 (sockopt: Change getsockopt() of SO_BINDTODEVICE to return an interface name) Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Brian Haley Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 02e0f6b156c3..c599e4782d45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1576,7 +1576,7 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern rwlock_t dev_base_lock; /* Device list lock */ -extern seqlock_t devnet_rename_seq; /* Device rename lock */ +extern seqcount_t devnet_rename_seq; /* Device rename seq */ #define for_each_netdev(net, d) \ -- cgit v1.2.3 From 53e872681fed6a43047e71bf927f77d06f467988 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Dec 2012 13:29:52 -0500 Subject: ext4: fix deadlock in journal_unmap_buffer() We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1be23d9fdacb..e30b66346942 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *, extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); -extern void jbd2_journal_invalidatepage(journal_t *, +extern int jbd2_journal_invalidatepage(journal_t *, struct page *, unsigned long); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int jbd2_journal_stop(handle_t *); -- cgit v1.2.3 From 08b60f8438879a84246d7debded31c9cb7aea6e4 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 24 Dec 2012 11:14:58 -0700 Subject: namei.h: include errno.h This solves: In file included from fs/ext3/symlink.c:20:0: include/linux/namei.h: In function 'retry_estale': include/linux/namei.h:114:19: error: 'ESTALE' undeclared (first use in this function) Signed-off-by: Stephen Warren Signed-off-by: Al Viro --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index e998c030061d..5a5ff57ceed4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -2,6 +2,7 @@ #define _LINUX_NAMEI_H #include +#include #include #include -- cgit v1.2.3 From c876ad7682155958d0c9c27afe9017925c230d64 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 21 Dec 2012 20:27:12 -0800 Subject: pidns: Stop pid allocation when init dies Oleg pointed out that in a pid namespace the sequence. - pid 1 becomes a zombie - setns(thepidns), fork,... - reaping pid 1. - The injected processes exiting. Can lead to processes attempting access their child reaper and instead following a stale pointer. That waitpid for init can return before all of the processes in the pid namespace have exited is also unfortunate. Avoid these problems by disabling the allocation of new pids in a pid namespace when init dies, instead of when the last process in a pid namespace is reaped. Pointed-out-by: Oleg Nesterov Reviewed-by: Oleg Nesterov Signed-off-by: "Eric W. Biederman" --- include/linux/pid.h | 1 + include/linux/pid_namespace.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index b152d44fb181..2381c973d897 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -121,6 +121,7 @@ int next_pidmap(struct pid_namespace *pid_ns, unsigned int last); extern struct pid *alloc_pid(struct pid_namespace *ns); extern void free_pid(struct pid *pid); +extern void disable_pid_allocation(struct pid_namespace *ns); /* * ns_of_pid() returns the pid namespace in which the specified pid was diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index bf285999273a..215e5e3dda10 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -21,7 +21,7 @@ struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; int last_pid; - int nr_hashed; + unsigned int nr_hashed; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; @@ -42,6 +42,8 @@ struct pid_namespace { extern struct pid_namespace init_pid_ns; +#define PIDNS_HASH_ADDING (1U << 31) + #ifdef CONFIG_PID_NS static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { -- cgit v1.2.3 From 812089e01b9f65f90fc8fc670d8cce72a0e01fbb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 1 Dec 2012 12:37:20 -0800 Subject: PCI: Reduce Ricoh 0xe822 SD card reader base clock frequency to 50MHz Otherwise it fails like this on cards like the Transcend 16GB SDHC card: mmc0: new SDHC card at address b368 mmcblk0: mmc0:b368 SDC 15.0 GiB mmcblk0: error -110 sending status command, retrying mmcblk0: error -84 transferring data, sector 0, nr 8, cmd response 0x900, card status 0xb0 Tested on my Lenovo x200 laptop. [bhelgaas: changelog] Signed-off-by: Andy Lutomirski Signed-off-by: Bjorn Helgaas Acked-by: Chris Ball CC: Manoj Iyer CC: stable@vger.kernel.org --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f8447376ddb..0eb65796bcb9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1568,6 +1568,7 @@ #define PCI_DEVICE_ID_RICOH_RL5C476 0x0476 #define PCI_DEVICE_ID_RICOH_RL5C478 0x0478 #define PCI_DEVICE_ID_RICOH_R5C822 0x0822 +#define PCI_DEVICE_ID_RICOH_R5CE822 0xe822 #define PCI_DEVICE_ID_RICOH_R5CE823 0xe823 #define PCI_DEVICE_ID_RICOH_R5C832 0x0832 #define PCI_DEVICE_ID_RICOH_R5C843 0x0843 -- cgit v1.2.3 From ad4b3fb7ff9940bcdb1e4cd62bd189d10fa636ba Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 21 Dec 2012 13:03:50 -0500 Subject: mm: Fix PageHead when !CONFIG_PAGEFLAGS_EXTENDED Unfortunately with !CONFIG_PAGEFLAGS_EXTENDED, (!PageHead) is false, and (PageHead) is true, for tail pages. If this is indeed the intended behavior, which I doubt because it breaks cache cleaning on some ARM systems, then the nomenclature is highly problematic. This patch makes sure PageHead is only true for head pages and PageTail is only true for tail pages, and neither is true for non-compound pages. [ This buglet seems ancient - seems to have been introduced back in Apr 2008 in commit 6a1e7f777f61: "pageflags: convert to the use of new macros". And the reason nobody noticed is because the PageHead() tests are almost all about just sanity-checking, and only used on pages that are actual page heads. The fact that the old code returned true for tail pages too was thus not really noticeable. - Linus ] Signed-off-by: Christoffer Dall Acked-by: Andrea Arcangeli Cc: Andrew Morton Cc: Will Deacon Cc: Steve Capper Cc: Christoph Lameter Cc: stable@kernel.org # 2.6.26+ Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b5d13841604e..70473da47b3f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -362,7 +362,7 @@ static inline void ClearPageCompound(struct page *page) * pages on the LRU and/or pagecache. */ TESTPAGEFLAG(Compound, compound) -__PAGEFLAG(Head, compound) +__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound) /* * PG_reclaim is used in combination with PG_compound to mark the @@ -374,8 +374,14 @@ __PAGEFLAG(Head, compound) * PG_compound & PG_reclaim => Tail page * PG_compound & ~PG_reclaim => Head page */ +#define PG_head_mask ((1L << PG_compound)) #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) +static inline int PageHead(struct page *page) +{ + return ((page->flags & PG_head_tail_mask) == PG_head_mask); +} + static inline int PageTail(struct page *page) { return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); -- cgit v1.2.3 From a7a88b23737095e6c18a20c5d4eef9e25ec5b829 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 2 Jan 2013 02:04:23 -0800 Subject: mempolicy: remove arg from mpol_parse_str, mpol_to_str Remove the unused argument (formerly no_context) from mpol_parse_str() and from mpol_to_str(). Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 9adc270de7ef..92bc9988a180 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -165,11 +165,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, #ifdef CONFIG_TMPFS -extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context); +extern int mpol_parse_str(char *str, struct mempolicy **mpol); #endif -extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, - int no_context); +extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ static inline int vma_migratable(struct vm_area_struct *vma) @@ -296,15 +295,13 @@ static inline void check_highest_zone(int k) } #ifdef CONFIG_TMPFS -static inline int mpol_parse_str(char *str, struct mempolicy **mpol, - int no_context) +static inline int mpol_parse_str(char *str, struct mempolicy **mpol) { return 1; /* error */ } #endif -static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, - int no_context) +static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) { return 0; } -- cgit v1.2.3 From 42288fe366c4f1ce7522bc9f27d0bc2a81c55264 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 21 Dec 2012 23:10:25 +0000 Subject: mm: mempolicy: Convert shared_policy mutex to spinlock Sasha was fuzzing with trinity and reported the following problem: BUG: sleeping function called from invalid context at kernel/mutex.c:269 in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main 2 locks held by trinity-main/6361: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x1e4/0x4f0 #1: (&(&mm->page_table_lock)->rlock){+.+...}, at: [] handle_pte_fault+0x3f7/0x6a0 Pid: 6361, comm: trinity-main Tainted: G W 3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74 Call Trace: __might_sleep+0x1c3/0x1e0 mutex_lock_nested+0x29/0x50 mpol_shared_policy_lookup+0x2e/0x90 shmem_get_policy+0x2e/0x30 get_vma_policy+0x5a/0xa0 mpol_misplaced+0x41/0x1d0 handle_pte_fault+0x465/0x6a0 This was triggered by a different version of automatic NUMA balancing but in theory the current version is vunerable to the same problem. do_numa_page -> numa_migrate_prep -> mpol_misplaced -> get_vma_policy -> shmem_get_policy It's very unlikely this will happen as shared pages are not marked pte_numa -- see the page_mapcount() check in change_pte_range() -- but it is possible. To address this, this patch restores sp->lock as originally implemented by Kosaki Motohiro. In the path where get_vma_policy() is called, it should not be calling sp_alloc() so it is not necessary to treat the PTL specially. Signed-off-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 92bc9988a180..0d7df39a5885 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -123,7 +123,7 @@ struct sp_node { struct shared_policy { struct rb_root root; - struct mutex mutex; + spinlock_t lock; }; void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); -- cgit v1.2.3 From 3d33fcc11bdd11b6949cf5c406726a094395dc4f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Jan 2013 15:12:55 +0000 Subject: UAPI: Remove empty Kbuild files Empty files can get deleted by the patch program, so remove empty Kbuild files and their links from the parent Kbuilds. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 5 ----- include/linux/hdlc/Kbuild | 0 include/linux/hsi/Kbuild | 0 include/linux/raid/Kbuild | 0 include/linux/usb/Kbuild | 0 5 files changed, 5 deletions(-) delete mode 100644 include/linux/Kbuild delete mode 100644 include/linux/hdlc/Kbuild delete mode 100644 include/linux/hsi/Kbuild delete mode 100644 include/linux/raid/Kbuild delete mode 100644 include/linux/usb/Kbuild (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild deleted file mode 100644 index 7fe2dae251e5..000000000000 --- a/include/linux/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -header-y += dvb/ -header-y += hdlc/ -header-y += hsi/ -header-y += raid/ -header-y += usb/ diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/hsi/Kbuild b/include/linux/hsi/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 -- cgit v1.2.3 From f568f6ca811fe681ecfd11c4ce78b6aa488020c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:02:05 -0800 Subject: pstore: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from the pstore filesystem. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Signed-off-by: Greg Kroah-Hartman --- include/linux/pstore_ram.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 098d2a838296..cb6ab5feab67 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -46,9 +46,8 @@ struct persistent_ram_zone { size_t old_log_size; }; -struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, u32 sig, - int ecc_size); +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, int ecc_size); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -- cgit v1.2.3 From 0f58a01ddd5e8177255705ba15e64c3b74d67993 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:12:59 -0800 Subject: Drivers: bcma: remove __dev* attributes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit, __devexit_p, and __devexit from these drivers. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: "Rafał Miłecki" Signed-off-by: Greg Kroah-Hartman --- include/linux/bcma/bcma_driver_gmac_cmn.h | 2 +- include/linux/bcma/bcma_driver_pci.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h index def894b83b0d..4dd1f33e36a2 100644 --- a/include/linux/bcma/bcma_driver_gmac_cmn.h +++ b/include/linux/bcma/bcma_driver_gmac_cmn.h @@ -92,7 +92,7 @@ struct bcma_drv_gmac_cmn { #define gmac_cmn_write32(gc, offset, val) bcma_write32((gc)->core, offset, val) #ifdef CONFIG_BCMA_DRIVER_GMAC_CMN -extern void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); +extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); #else static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { } #endif diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 41da581e1612..c48d98d27b77 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -214,7 +214,7 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc); +extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); extern void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend); -- cgit v1.2.3 From e389623a68622e3c9be440ab522fac1aa1ca3454 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:15:49 -0800 Subject: include: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from some include files that were previously missed. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Signed-off-by: Greg Kroah-Hartman --- include/linux/ata_platform.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h index fe9989636b62..b9fde17f767c 100644 --- a/include/linux/ata_platform.h +++ b/include/linux/ata_platform.h @@ -15,12 +15,12 @@ struct pata_platform_info { unsigned int irq_flags; }; -extern int __devinit __pata_platform_probe(struct device *dev, - struct resource *io_res, - struct resource *ctl_res, - struct resource *irq_res, - unsigned int ioport_shift, - int __pio_mask); +extern int __pata_platform_probe(struct device *dev, + struct resource *io_res, + struct resource *ctl_res, + struct resource *irq_res, + unsigned int ioport_shift, + int __pio_mask); /* * Marvell SATA private data -- cgit v1.2.3 From 03f595668017f1a1fb971c02fc37140bc6e7bb1c Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:50 -0800 Subject: ipc: add sysctl to specify desired next object id Add 3 new variables and sysctls to tune them (by one "next_id" variable for messages, semaphores and shared memory respectively). This variable can be used to set desired id for next allocated IPC object. By default it's equal to -1 and old behaviour is preserved. If this variable is non-negative, then desired idr will be extracted from it and used as a start value to search for free IDR slot. Notes: 1) this patch doesn't guarantee that the new object will have desired id. So it's up to user space how to handle new object with wrong id. 2) After a sucessful id allocation attempt, "next_id" will be set back to -1 (if it was non-negative). [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index fe771978e877..ae221a7b5092 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -24,6 +24,7 @@ struct ipc_ids { unsigned short seq_max; struct rw_semaphore rw_mutex; struct idr ipcs_idr; + int next_id; }; struct ipc_namespace { -- cgit v1.2.3 From f9dd87f4738c7555aca2cdf8cb2b2326cafb0cad Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:52 -0800 Subject: ipc: message queue receive cleanup Move all message related manipulation into one function msg_fill(). Actually, two functions because of the compat one. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msg.h b/include/linux/msg.h index 7a4b9e97d29a..fc5743a554e6 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -34,7 +34,8 @@ struct msg_queue { /* Helper routines for sys_msgsnd and sys_msgrcv */ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); -extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext, - size_t msgsz, long msgtyp, int msgflg); +extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, + int msgflg, + long (*msg_fill)(void __user *, struct msg_msg *, size_t)); #endif /* _LINUX_MSG_H */ -- cgit v1.2.3 From 3a665531a3b7c2ad2c87903b24646be6916340e4 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:56 -0800 Subject: selftests: IPC message queue copy feature test This test can be used to check wheither kernel supports IPC message queue copy and restore features (required by CRIU project). Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/msg.h b/include/linux/msg.h index fc5743a554e6..391af8d11cce 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -36,6 +36,7 @@ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, - long (*msg_fill)(void __user *, struct msg_msg *, size_t)); + long (*msg_fill)(void __user *, struct msg_msg *, + size_t)); #endif /* _LINUX_MSG_H */ -- cgit v1.2.3 From a458431e176ddb27e8ef8b98c2a681b217337393 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 4 Jan 2013 15:35:08 -0800 Subject: mm: fix zone_watermark_ok_safe() accounting of isolated pages Commit 702d1a6e0766 ("memory-hotplug: fix kswapd looping forever problem") added an isolated pageblocks counter (nr_pageblock_isolate in struct zone) and used it to adjust free pages counter in zone_watermark_ok_safe() to prevent kswapd looping forever problem. Then later, commit 2139cbe627b8 ("cma: fix counting of isolated pages") fixed accounting of isolated pages in global free pages counter. It made the previous zone_watermark_ok_safe() fix unnecessary and potentially harmful (cause now isolated pages may be accounted twice making free pages counter incorrect). This patch removes the special isolated pageblocks counter altogether which fixes zone_watermark_ok_safe() free pages check. Reported-by: Tomasz Stanislawski Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Aaditya Kumar Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Marek Szyprowski Cc: Michal Nazarewicz Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4bec5be82cab..73b64a38b984 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -503,14 +503,6 @@ struct zone { * rarely used fields: */ const char *name; -#ifdef CONFIG_MEMORY_ISOLATION - /* - * the number of MIGRATE_ISOLATE *pageblock*. - * We need this for free page counting. Look at zone_watermark_ok_safe. - * It's protected by zone->lock - */ - int nr_pageblock_isolate; -#endif } ____cacheline_internodealigned_in_smp; typedef enum { -- cgit v1.2.3 From 08c097fc3bb283299a6915a6a3795edab85979b1 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 9 Jan 2013 14:16:30 +0000 Subject: cred: Remove tgcred pointer from struct cred Commit 3a50597de863 ("KEYS: Make the session and process keyrings per-thread") removed the definition of the thread_group_cred structure, but left a now unused pointer in struct cred. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index abb2cd50f6b2..04421e825365 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -128,7 +128,6 @@ struct cred { struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ - struct thread_group_cred *tgcred; /* thread-group shared credentials */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ -- cgit v1.2.3 From 54b956b903607f8f8878754dd4352da6a54a1da2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Jan 2013 10:57:01 -0800 Subject: Remove __dev* markings from init.h Now that all in-kernel users of __dev* are gone, let's remove them from init.h to keep them from popping up again and again. Thanks to Bill Pemberton for doing all of the hard work to make removal of this possible. Cc: Bill Pemberton Cc: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- include/linux/init.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index a799273714ac..10ed4f436458 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -93,14 +93,6 @@ #define __exit __section(.exit.text) __exitused __cold notrace -/* Used for HOTPLUG, but that is always enabled now, so just make them noops */ -#define __devinit -#define __devinitdata -#define __devinitconst -#define __devexit -#define __devexitdata -#define __devexitconst - /* Used for HOTPLUG_CPU */ #define __cpuinit __section(.cpuinit.text) __cold notrace #define __cpuinitdata __section(.cpuinit.data) @@ -337,18 +329,6 @@ void __init parse_early_options(char *cmdline); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ -/* Functions marked as __devexit may be discarded at kernel link time, depending - on config options. Newer versions of binutils detect references from - retained sections to discarded sections and flag an error. Pointers to - __devexit functions must use __devexit_p(function_name), the wrapper will - insert either the function_name or NULL, depending on the config options. - */ -#if defined(MODULE) || defined(CONFIG_HOTPLUG) -#define __devexit_p(x) x -#else -#define __devexit_p(x) NULL -#endif - #ifdef MODULE #define __exit_p(x) x #else -- cgit v1.2.3 From 896f97ea95c1d29c0520ee0766b66b7f64cb967c Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Fri, 11 Jan 2013 14:31:36 -0800 Subject: lib: cpu_rmap: avoid flushing all workqueues In some cases, free_irq_cpu_rmap() is called while holding a lock (eg rtnl). This can lead to deadlocks, because it invokes flush_scheduled_work() which ends up waiting for whole system workqueue to flush, but some pending works might try to acquire the lock we are already holding. This commit uses reference-counting to replace irq_run_affinity_notifiers(). It also removes irq_run_affinity_notifiers() altogether. [akpm@linux-foundation.org: eliminate free_cpu_rmap, rename cpu_rmap_reclaim() to cpu_rmap_release(), propagate kref_put() retval from cpu_rmap_put()] Signed-off-by: David Decotigny Reviewed-by: Ben Hutchings Acked-by: Eric Dumazet Reviewed-by: Josh Triplett Cc: "David S. Miller" Cc: Or Gerlitz Acked-by: Amir Vadai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu_rmap.h | 13 ++++--------- include/linux/interrupt.h | 5 ----- 2 files changed, 4 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index ac3bbb5b9502..1739510d8994 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -13,9 +13,11 @@ #include #include #include +#include /** * struct cpu_rmap - CPU affinity reverse-map + * @refcount: kref for object * @size: Number of objects to be reverse-mapped * @used: Number of objects added * @obj: Pointer to array of object pointers @@ -23,6 +25,7 @@ * based on affinity masks */ struct cpu_rmap { + struct kref refcount; u16 size, used; void **obj; struct { @@ -33,15 +36,7 @@ struct cpu_rmap { #define CPU_RMAP_DIST_INF 0xffff extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); - -/** - * free_cpu_rmap - free CPU affinity reverse-map - * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL - */ -static inline void free_cpu_rmap(struct cpu_rmap *rmap) -{ - kfree(rmap); -} +extern int cpu_rmap_put(struct cpu_rmap *rmap); extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj); extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5e4e6170f43a..5fa5afeeb759 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -268,11 +268,6 @@ struct irq_affinity_notify { extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); -static inline void irq_run_affinity_notifiers(void) -{ - flush_scheduled_work(); -} - #else /* CONFIG_SMP */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) -- cgit v1.2.3 From 1b963c81b14509e330e0fe3218b645ece2738dc5 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 11 Jan 2013 14:31:56 -0800 Subject: lockdep, rwsem: provide down_write_nest_lock() down_write_nest_lock() provides a means to annotate locking scenario where an outer lock is guaranteed to serialize the order nested locks are being acquired. This is analogoue to already existing mutex_lock_nest_lock() and spin_lock_nest_lock(). Signed-off-by: Jiri Kosina Cc: Rik van Riel Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Mel Gorman Tested-by: Sedat Dilek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 3 +++ include/linux/rwsem.h | 9 +++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 00e46376e28f..2bca44b0893c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -524,14 +524,17 @@ static inline void print_irqtrace_events(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCK_ALLOC # ifdef CONFIG_PROVE_LOCKING # define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) +# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) # define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, NULL, i) # else # define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) +# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) # define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, NULL, i) # endif # define rwsem_release(l, n, i) lock_release(l, n, i) #else # define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_nest(l, s, t, n, i) do { } while (0) # define rwsem_acquire_read(l, s, t, i) do { } while (0) # define rwsem_release(l, n, i) do { } while (0) #endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 54bd7cd7ecbd..413cc11e414a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -125,8 +125,17 @@ extern void downgrade_write(struct rw_semaphore *sem); */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); +extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); + +# define down_write_nest_lock(sem, nest_lock) \ +do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _down_write_nest_lock(sem, &(nest_lock)->dep_map); \ +} while (0); + #else # define down_read_nested(sem, subclass) down_read(sem) +# define down_write_nest_lock(sem, nest_lock) down_read(sem) # define down_write_nested(sem, subclass) down_write(sem) #endif -- cgit v1.2.3 From 7b9205bd775afc4439ed86d617f9042ee9e76a71 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Jan 2013 14:32:05 -0800 Subject: audit: create explicit AUDIT_SECCOMP event type The seccomp path was using AUDIT_ANOM_ABEND from when seccomp mode 1 could only kill a process. While we still want to make sure an audit record is forced on a kill, this should use a separate record type since seccomp mode 2 introduces other behaviors. In the case of "handled" behaviors (process wasn't killed), only emit a record if the process is under inspection. This change also fixes userspace examination of seccomp audit events, since it was considered malformed due to missing fields of the AUDIT_ANOM_ABEND event type. Signed-off-by: Kees Cook Cc: Al Viro Cc: Eric Paris Cc: Jeff Layton Cc: "Eric W. Biederman" Cc: Julien Tinnes Acked-by: Will Drewry Acked-by: Steve Grubb Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index bce729afbcf9..9d5104d7aba9 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -157,7 +157,8 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (unlikely(!audit_dummy_context())) + /* Force a record to be reported if a signal was delivered. */ + if (signr || unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } -- cgit v1.2.3 From c0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 11 Jan 2013 14:32:13 -0800 Subject: linux/audit.h: move ptrace.h include to kernel header While the kernel internals want pt_regs (and so it includes linux/ptrace.h), the user version of audit.h does not need it. So move the include out of the uapi version. This avoids issues where people want the audit defines and userland ptrace api. Including both the kernel ptrace and the userland ptrace headers can easily lead to failure. Signed-off-by: Mike Frysinger Cc: Eric Paris Cc: Al Viro Reviewed-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 9d5104d7aba9..5a6d718adf34 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -24,6 +24,7 @@ #define _LINUX_AUDIT_H_ #include +#include #include struct audit_sig_info { -- cgit v1.2.3 From 8fb74b9fb2b182d54beee592350d9ea1f325917a Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 11 Jan 2013 14:32:16 -0800 Subject: mm: compaction: partially revert capture of suitable high-order page Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when waiting for POLLIN on a local TCP socket. It was easier to trigger if there was disk IO and dirty pages at the same time and he bisected it to commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). The intention of that patch was to improve high-order allocations under memory pressure after changes made to reclaim in 3.6 drastically hurt THP allocations but the approach was flawed. For Eric, the problem was that page->pfmemalloc was not being cleared for captured pages leading to a poor interaction with swap-over-NFS support causing the packets to be dropped. However, I identified a few more problems with the patch including the fact that it can increase contention on zone->lock in some cases which could result in async direct compaction being aborted early. In retrospect the capture patch took the wrong approach. What it should have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it was allocating for THP and avoided races that way. While the patch was showing to improve allocation success rates at the time, the benefit is marginal given the relative complexity and it should be revisited from scratch in the context of the other reclaim-related changes that have taken place since the patch was first written and tested. This patch partially reverts commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). Reported-and-tested-by: Eric Wong Tested-by: Eric Dumazet Cc: Signed-off-by: Mel Gorman Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 ++-- include/linux/mm.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 6ecb6dc2f303..cc7bddeaf553 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - bool sync, bool *contended, struct page **page); + bool sync, bool *contended); extern int compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 63204078f72b..66e2f7c61e5c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); int split_free_page(struct page *page); -int capture_free_page(struct page *page, int alloc_order, int migratetype); /* * Compound pages have a destructor function. Provide a -- cgit v1.2.3 From 3cb7a56344ca45ee56d71c5f8fe9f922306bff1f Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 11 Jan 2013 14:32:20 -0800 Subject: lib/rbtree.c: avoid the use of non-static __always_inline lib/rbtree.c declared __rb_erase_color() as __always_inline void, and then exported it with EXPORT_SYMBOL. This was because __rb_erase_color() must be exported for augmented rbtree users, but it must also be inlined into rb_erase() so that the dummy callback can get optimized out of that call site. (Actually with a modern compiler, none of the dummy callback functions should even be generated as separate text functions). The above usage is legal C, but it was unusual enough for some compilers to warn about it. This change makes things more explicit, with a static __always_inline ____rb_erase_color function for use in rb_erase(), and a separate non-inline __rb_erase_color function for use in rb_erase_augmented call sites. Signed-off-by: Michel Lespinasse Reported-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 2ac60c9cf644..fea49b5da12a 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -123,9 +123,9 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); -static __always_inline void -rb_erase_augmented(struct rb_node *node, struct rb_root *root, - const struct rb_augment_callbacks *augment) +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) { struct rb_node *child = node->rb_right, *tmp = node->rb_left; struct rb_node *parent, *rebalance; @@ -217,6 +217,14 @@ rb_erase_augmented(struct rb_node *node, struct rb_root *root, } augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } -- cgit v1.2.3 From d07d7507bfb4e23735c9b83e397c43e1e8a173e8 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 10 Jan 2013 23:19:10 +0000 Subject: net, wireless: overwrite default_ethtool_ops Since: commit 2c60db037034d27f8c636403355d52872da92f81 Author: Eric Dumazet Date: Sun Sep 16 09:17:26 2012 +0000 net: provide a default dev->ethtool_ops wireless core does not correctly assign ethtool_ops. After alloc_netdev*() call, some cfg80211 drivers provide they own ethtool_ops, but some do not. For them, wireless core provide generic cfg80211_ethtool_ops, which is assigned in NETDEV_REGISTER notify call: if (!dev->ethtool_ops) dev->ethtool_ops = &cfg80211_ethtool_ops; But after Eric's commit, dev->ethtool_ops is no longer NULL (on cfg80211 drivers without custom ethtool_ops), but points to &default_ethtool_ops. In order to fix the problem, provide function which will overwrite default_ethtool_ops and use it by wireless core. Signed-off-by: Stanislaw Gruszka Acked-by: Johannes Berg Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c599e4782d45..9ef07d0868b6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -60,6 +60,9 @@ struct wireless_dev; #define SET_ETHTOOL_OPS(netdev,ops) \ ( (netdev)->ethtool_ops = (ops) ) +extern void netdev_set_default_ethtool_ops(struct net_device *dev, + const struct ethtool_ops *ops); + /* hardware address assignment types */ #define NET_ADDR_PERM 0 /* address is permanent (default) */ #define NET_ADDR_RANDOM 1 /* address is generated randomly */ -- cgit v1.2.3 From 0d21b0e3477395e7ff2acc269f15df6e6a8d356d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 12 Jan 2013 11:38:44 +1030 Subject: module: add new state MODULE_STATE_UNFORMED. You should never look at such a module, so it's excised from all paths which traverse the modules list. We add the state at the end, to avoid gratuitous ABI break (ksplice). Signed-off-by: Rusty Russell --- include/linux/module.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 7760c6d344a3..1375ee3f03aa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -199,11 +199,11 @@ struct module_use { struct module *source, *target; }; -enum module_state -{ - MODULE_STATE_LIVE, - MODULE_STATE_COMING, - MODULE_STATE_GOING, +enum module_state { + MODULE_STATE_LIVE, /* Normal state. */ + MODULE_STATE_COMING, /* Full formed, running module_init. */ + MODULE_STATE_GOING, /* Going away. */ + MODULE_STATE_UNFORMED, /* Still setting it up. */ }; /** -- cgit v1.2.3 From 803739d25c2343da6d2f95eebdcbc08bf67097d4 Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Mon, 17 Dec 2012 23:18:59 +0800 Subject: [libata] replace sata_settings with devslp_timing NCQ capability was used to check availability of SATA Settings page from Identify Device Data Log, which contains DevSlp timing variables. It does not work on some HDDs and leads to error messages. IDENTIFY word 78 bit 5(Hardware Feature Control) can't work either because it is only the sufficient condition of Identify Device data log, not the necessary condition. This patch replaced ata_device->sata_settings with ->devslp_timing to only save DevSlp timing variables(8 bytes), instead of the whole SATA Settings page(512 bytes). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=51881 Reported-by: Borislav Petkov Signed-off-by: Shane Huang Cc: stable@vger.kernel.org Signed-off-by: Jeff Garzik --- include/linux/ata.h | 8 +++++--- include/linux/libata.h | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 408da9502177..8f7a3d68371a 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -297,10 +297,12 @@ enum { ATA_LOG_SATA_NCQ = 0x10, ATA_LOG_SATA_ID_DEV_DATA = 0x30, ATA_LOG_SATA_SETTINGS = 0x08, - ATA_LOG_DEVSLP_MDAT = 0x30, + ATA_LOG_DEVSLP_OFFSET = 0x30, + ATA_LOG_DEVSLP_SIZE = 0x08, + ATA_LOG_DEVSLP_MDAT = 0x00, ATA_LOG_DEVSLP_MDAT_MASK = 0x1F, - ATA_LOG_DEVSLP_DETO = 0x31, - ATA_LOG_DEVSLP_VALID = 0x37, + ATA_LOG_DEVSLP_DETO = 0x01, + ATA_LOG_DEVSLP_VALID = 0x07, ATA_LOG_DEVSLP_VALID_MASK = 0x80, /* READ/WRITE LONG (obsolete) */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 83ba0ab2c915..649e5f86b5f0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -652,8 +652,8 @@ struct ata_device { u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ }; - /* Identify Device Data Log (30h), SATA Settings (page 08h) */ - u8 sata_settings[ATA_SECT_SIZE]; + /* DEVSLP Timing Variables from Identify Device Data Log */ + u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; /* error history */ int spdn_cnt; -- cgit v1.2.3 From 8aef33a7cf40ca9da188e8578b2abe7267a38c52 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 15 Jan 2013 14:18:04 +0100 Subject: cpuidle: remove the power_specified field in the driver We realized that the power usage field is never filled and when it is filled for tegra, the power_specified flag is not set causing all of these values to be reset when the driver is initialized with set_power_state(). However, the power_specified flag can be simply removed under the assumption that the states are always backward sorted, which is the case with the current code. This change allows the menu governor select function and the cpuidle_play_dead() to be simplified. Moreover, the set_power_states() function can removed as it does not make sense any more. Drop the power_specified flag from struct cpuidle_driver and make the related changes as described above. As a consequence, this also fixes the bug where on the dynamic C-states system, the power fields are not initialized. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=42870 References: https://bugzilla.kernel.org/show_bug.cgi?id=43349 References: https://lkml.org/lkml/2012/10/16/518 Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3711b34dc4f9..24cd1037b6d6 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -126,9 +126,9 @@ struct cpuidle_driver { struct module *owner; int refcnt; - unsigned int power_specified:1; /* set to 1 to use the core cpuidle time keeping (for all states). */ unsigned int en_core_tk_irqen:1; + /* states array must be ordered in decreasing power consumption */ struct cpuidle_state states[CPUIDLE_STATE_MAX]; int state_count; int safe_state_index; -- cgit v1.2.3 From 774a1221e862b343388347bac9b318767336b20b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jan 2013 18:52:51 -0800 Subject: module, async: async_synchronize_full() on module init iff async is used If the default iosched is built as module, the kernel may deadlock while trying to load the iosched module on device probe if the probing was running off async. This is because async_synchronize_full() at the end of module init ends up waiting for the async job which initiated the module loading. async A modprobe 1. finds a device 2. registers the block device 3. request_module(default iosched) 4. modprobe in userland 5. load and init module 6. async_synchronize_full() Async A waits for modprobe to finish in request_module() and modprobe waits for async A to finish in async_synchronize_full(). Because there's no easy to track dependency once control goes out to userland, implementing properly nested flushing is difficult. For now, make module init perform async_synchronize_full() iff module init has queued async jobs as suggested by Linus. This avoids the described deadlock because iosched module doesn't use async and thus wouldn't invoke async_synchronize_full(). This is hacky and incomplete. It will deadlock if async module loading nests; however, this works around the known problem case and seems to be the best of bad options. For more details, please refer to the following thread. http://thread.gmane.org/gmane.linux.kernel/1420814 Signed-off-by: Tejun Heo Reported-by: Alex Riesen Tested-by: Ming Lei Tested-by: Alex Riesen Cc: Arjan van de Ven Cc: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 206bb089c06b..6fc8f45de4e9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1810,6 +1810,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ -- cgit v1.2.3 From e65b9ad222c280c031bc8d3642cc38dd3026fe06 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 15 Jan 2013 20:12:37 +0100 Subject: lockdep, rwsem: fix down_write_nest_lock() if !CONFIG_DEBUG_LOCK_ALLOC Commit 1b963c81b145 ("lockdep, rwsem: provide down_write_nest_lock()") contains a bug in a codepath when CONFIG_DEBUG_LOCK_ALLOC is disabled, which causes down_read() to be called instead of down_write() by mistake on such configurations. Fix that. Reported-and-tested-by: Andrew Clayton Reported-and-tested-by: Zlatko Calusic Signed-off-by: Jiri Kosina Reviewed-by: Rik van Riel Signed-off-by: Linus Torvalds --- include/linux/rwsem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 413cc11e414a..8da67d625e13 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -135,7 +135,7 @@ do { \ #else # define down_read_nested(sem, subclass) down_read(sem) -# define down_write_nest_lock(sem, nest_lock) down_read(sem) +# define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) #endif -- cgit v1.2.3 From edea0d03ee5f0ae0051b6adb6681ebdf976b1ca4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 20 Jan 2013 20:25:47 +0100 Subject: ia64: kill thread_matches(), unexport ptrace_check_attach() The ia64 function "thread_matches()" has no users since commit e868a55c2a8c ("[IA64] remove find_thread_for_addr()"). Remove it. This allows us to make ptrace_check_attach() static to kernel/ptrace.c, which is good since we'll need to change the semantics of it and fix up all the callers. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 1693775ecfe8..89573a33ab3c 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -45,7 +45,6 @@ extern long arch_ptrace(struct task_struct *child, long request, extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern void ptrace_disable(struct task_struct *); -extern int ptrace_check_attach(struct task_struct *task, bool ignore_state); extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); -- cgit v1.2.3 From 910ffdb18a6408e14febbb6e4b6840fd2c928c82 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Jan 2013 20:47:41 +0100 Subject: ptrace: introduce signal_wake_up_state() and ptrace_signal_wake_up() Cleanup and preparation for the next change. signal_wake_up(resume => true) is overused. None of ptrace/jctl callers actually want to wakeup a TASK_WAKEKILL task, but they can't specify the necessary mask. Turn signal_wake_up() into signal_wake_up_state(state), reintroduce signal_wake_up() as a trivial helper, and add ptrace_signal_wake_up() which adds __TASK_TRACED. This way ptrace_signal_wake_up() can work "inside" ptrace_request() even if the tracee doesn't have the TASK_WAKEKILL bit set. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/sched.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6fc8f45de4e9..d2112477ff5e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2714,7 +2714,16 @@ static inline void thread_group_cputime_init(struct signal_struct *sig) extern void recalc_sigpending_and_wake(struct task_struct *t); extern void recalc_sigpending(void); -extern void signal_wake_up(struct task_struct *t, int resume_stopped); +extern void signal_wake_up_state(struct task_struct *t, unsigned int state); + +static inline void signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); +} +static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? __TASK_TRACED : 0); +} /* * Wrappers for p->thread_info->cpu access. No-op on UP. -- cgit v1.2.3