From 4b8164b91d9fdff4dbac0a742d076bdff7fda21b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 31 Jan 2015 20:08:47 -0500 Subject: new helper: dup_iter() Copy iter and kmemdup the underlying array for the copy. Returns a pointer to result of kmemdup() to be kfree()'d later. Signed-off-by: Al Viro --- include/linux/uio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 07a022641996..71880299ed48 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -98,6 +98,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); +const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); + static inline size_t iov_iter_count(struct iov_iter *i) { return i->count; -- cgit v1.2.3 From 09ee96b21456883e108c3b00597bb37ec512151b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 26 Feb 2015 11:41:28 -0500 Subject: dm snapshot: suspend merging snapshot when doing exception handover The "dm snapshot: suspend origin when doing exception handover" commit fixed a exception store handover bug associated with pending exceptions to the "snapshot-origin" target. However, a similar problem exists in snapshot merging. When snapshot merging is in progress, we use the target "snapshot-merge" instead of "snapshot-origin". Consequently, during exception store handover, we must find the snapshot-merge target and suspend its associated mapped_device. To avoid lockdep warnings, the target must be suspended and resumed without holding _origins_lock. Introduce a dm_hold() function that grabs a reference on a mapped_device, but unlike dm_get(), it doesn't crash if the device has the DMF_FREEING flag set, it returns an error in this case. In snapshot_resume() we grab the reference to the origin device using dm_hold() while holding _origins_lock (_origins_lock guarantees that the device won't disappear). Then we release _origins_lock, suspend the device and grab _origins_lock again. NOTE to stable@ people: When backporting to kernels 3.18 and older, use dm_internal_suspend and dm_internal_resume instead of dm_internal_suspend_fast and dm_internal_resume_fast. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- include/linux/device-mapper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 2646aed1d3fe..fd23978d93fe 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -375,6 +375,7 @@ int dm_create(int minor, struct mapped_device **md); */ struct mapped_device *dm_get_md(dev_t dev); void dm_get(struct mapped_device *md); +int dm_hold(struct mapped_device *md); void dm_put(struct mapped_device *md); /* -- cgit v1.2.3 From c6331ba3d2d68758f36dbc3e09e648d312c24d97 Mon Sep 17 00:00:00 2001 From: Marcin Bis Date: Sun, 1 Mar 2015 13:49:32 +0100 Subject: spi: fix a typo in comment. alway -> always Signed-off-by: Marcin Bis Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ed9489d893a4..856d34dde79b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -649,7 +649,7 @@ struct spi_transfer { * sequence completes. On some systems, many such sequences can execute as * as single programmed DMA transfer. On all systems, these messages are * queued, and might complete after transactions to other devices. Messages - * sent to a given spi_device are alway executed in FIFO order. + * sent to a given spi_device are always executed in FIFO order. * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. -- cgit v1.2.3 From 40eeb111d7c88bfbc38e1dfe330bc4cec05e0806 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 5 Mar 2015 10:08:14 +0100 Subject: Revert "pinctrl: consumer: use correct retval for placeholder functions" This reverts commit 5a7d2efdd93f6c4bb6cd3d5df3d2f5611c9b87ac. As per discussion on the mailing list, this is not the right thing to do. NULL cookies are valid in the stubs. Reported-by: Wolfram Sang Signed-off-by: Linus Walleij --- include/linux/pinctrl/consumer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 72c0415d6c21..18eccefea06e 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -82,7 +82,7 @@ static inline int pinctrl_gpio_direction_output(unsigned gpio) static inline struct pinctrl * __must_check pinctrl_get(struct device *dev) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline void pinctrl_put(struct pinctrl *p) @@ -93,7 +93,7 @@ static inline struct pinctrl_state * __must_check pinctrl_lookup_state( struct pinctrl *p, const char *name) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline int pinctrl_select_state(struct pinctrl *p, @@ -104,7 +104,7 @@ static inline int pinctrl_select_state(struct pinctrl *p, static inline struct pinctrl * __must_check devm_pinctrl_get(struct device *dev) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline void devm_pinctrl_put(struct pinctrl *p) -- cgit v1.2.3 From 8603e1b30027f943cc9c1eef2b291d42c3347af1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Mar 2015 08:04:13 -0500 Subject: workqueue: fix hang involving racing cancel[_delayed]_work_sync()'s for PREEMPT_NONE cancel[_delayed]_work_sync() are implemented using __cancel_work_timer() which grabs the PENDING bit using try_to_grab_pending() and then flushes the work item with PENDING set to prevent the on-going execution of the work item from requeueing itself. try_to_grab_pending() can always grab PENDING bit without blocking except when someone else is doing the above flushing during cancelation. In that case, try_to_grab_pending() returns -ENOENT. In this case, __cancel_work_timer() currently invokes flush_work(). The assumption is that the completion of the work item is what the other canceling task would be waiting for too and thus waiting for the same condition and retrying should allow forward progress without excessive busy looping Unfortunately, this doesn't work if preemption is disabled or the latter task has real time priority. Let's say task A just got woken up from flush_work() by the completion of the target work item. If, before task A starts executing, task B gets scheduled and invokes __cancel_work_timer() on the same work item, its try_to_grab_pending() will return -ENOENT as the work item is still being canceled by task A and flush_work() will also immediately return false as the work item is no longer executing. This puts task B in a busy loop possibly preventing task A from executing and clearing the canceling state on the work item leading to a hang. task A task B worker executing work __cancel_work_timer() try_to_grab_pending() set work CANCELING flush_work() block for work completion completion, wakes up A __cancel_work_timer() while (forever) { try_to_grab_pending() -ENOENT as work is being canceled flush_work() false as work is no longer executing } This patch removes the possible hang by updating __cancel_work_timer() to explicitly wait for clearing of CANCELING rather than invoking flush_work() after try_to_grab_pending() fails with -ENOENT. Link: http://lkml.kernel.org/g/20150206171156.GA8942@axis.com v3: bit_waitqueue() can't be used for work items defined in vmalloc area. Switched to custom wake function which matches the target work item and exclusive wait and wakeup. v2: v1 used wake_up() on bit_waitqueue() which leads to NULL deref if the target bit waitqueue has wait_bit_queue's on it. Use DEFINE_WAIT_BIT() and __wake_up_bit() instead. Reported by Tomeu Vizoso. Signed-off-by: Tejun Heo Reported-by: Rabin Vincent Cc: Tomeu Vizoso Cc: stable@vger.kernel.org Tested-by: Jesper Nilsson Tested-by: Rabin Vincent --- include/linux/workqueue.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 74db135f9957..f597846ff605 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -70,7 +70,8 @@ enum { /* data contains off-queue information when !WORK_STRUCT_PWQ */ WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, - WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), + __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, + WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING), /* * When a work item is off queue, its high bits point to the last -- cgit v1.2.3 From f54b97ed0b17d3da5f98ba8188cd5646415a922d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 6 Mar 2015 16:37:41 +0000 Subject: irqchip: gicv3-its: Allocate enough memory for the full range of DeviceID The ITS table allocator is only allocating a single page per table. This works fine for most things, but leads to silent lack of interrupt delivery if we end-up with a device that has an ID that is out of the range defined by a single page of memory. Even worse, depending on the page size, behaviour changes, which is not a very good experience. A solution is actually to allocate memory for the full range of ID that the ITS supports. A massive waste memory wise, but at least a safe bet. Tested on a Phytium SoC. Tested-by: Chen Baozi Acked-by: Chen Baozi Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1425659870-11832-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 800544bc7bfd..cbdd440d486d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -166,6 +166,8 @@ #define GITS_TRANSLATER 0x10040 +#define GITS_TYPER_DEVBITS_SHIFT 13 +#define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) #define GITS_TYPER_PTA (1UL << 19) #define GITS_CBASER_VALID (1UL << 63) -- cgit v1.2.3 From 7cb991164a46992a499ecdc77b17f8ac94bdb75f Mon Sep 17 00:00:00 2001 From: Yun Wu Date: Fri, 6 Mar 2015 16:37:49 +0000 Subject: irqchip: gicv3-its: Define macros for GITS_CTLR fields Define macros for GITS_CTLR fields to avoid using magic numbers. Acked-by: Marc Zyngier Signed-off-by: Yun Wu Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1425659870-11832-11-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- include/linux/irqchip/arm-gic-v3.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index cbdd440d486d..781974afff9f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -166,6 +166,9 @@ #define GITS_TRANSLATER 0x10040 +#define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_QUIESCENT (1U << 31) + #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) #define GITS_TYPER_PTA (1UL << 19) -- cgit v1.2.3 From 3d3801effda19b21012b5d1981e96cc277df85fd Mon Sep 17 00:00:00 2001 From: Michael Turquette Date: Wed, 25 Feb 2015 09:11:01 -0800 Subject: clk: introduce clk_is_match Some drivers compare struct clk pointers as a means of knowing if the two pointers reference the same clock hardware. This behavior is dubious (drivers must not dereference struct clk), but did not cause any regressions until the per-user struct clk patch was merged. Now the test for matching clk's will always fail with per-user struct clk's. clk_is_match is introduced to fix the regression and prevent drivers from comparing the pointers manually. Fixes: 035a61c314eb ("clk: Make clk API return per-user struct clk instances") Cc: Russell King Cc: Shawn Guo Cc: Tomeu Vizoso Signed-off-by: Michael Turquette [arnd@arndb.de: Fix COMMON_CLK=N && HAS_CLK=Y config] Signed-off-by: Arnd Bergmann [sboyd@codeaurora.org: const arguments to clk_is_match() and remove unnecessary ternary operation] Signed-off-by: Stephen Boyd --- include/linux/clk.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 8381bbfbc308..68c16a6bedb3 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -125,6 +125,19 @@ int clk_set_phase(struct clk *clk, int degrees); */ int clk_get_phase(struct clk *clk); +/** + * clk_is_match - check if two clk's point to the same hardware clock + * @p: clk compared against q + * @q: clk compared against p + * + * Returns true if the two struct clk pointers both point to the same hardware + * clock node. Put differently, returns true if struct clk *p and struct clk *q + * share the same struct clk_core object. + * + * Returns false otherwise. Note that two NULL clks are treated as matching. + */ +bool clk_is_match(const struct clk *p, const struct clk *q); + #else static inline long clk_get_accuracy(struct clk *clk) @@ -142,6 +155,11 @@ static inline long clk_get_phase(struct clk *clk) return -ENOTSUPP; } +static inline bool clk_is_match(const struct clk *p, const struct clk *q) +{ + return p == q; +} + #endif /** -- cgit v1.2.3 From c29390c6dfeee0944ac6b5610ebbe403944378fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2015 18:42:02 -0700 Subject: xps: must clear sender_cpu before forwarding John reported that my previous commit added a regression on his router. This is because sender_cpu & napi_id share a common location, so get_xps_queue() can see garbage and perform an out of bound access. We need to make sure sender_cpu is cleared before doing the transmit, otherwise any NIC busy poll enabled (skb_mark_napi_id()) can trigger this bug. Signed-off-by: Eric Dumazet Reported-by: John Bisected-by: John Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 30007afe70b3..f54d6659713a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -948,6 +948,13 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) to->l4_hash = from->l4_hash; }; +static inline void skb_sender_cpu_clear(struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + skb->sender_cpu = 0; +#endif +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { -- cgit v1.2.3 From a5af5aa8b67dfdba36c853b70564fd2dfe73d478 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 12 Mar 2015 16:26:11 -0700 Subject: kasan, module, vmalloc: rework shadow allocation for modules Current approach in handling shadow memory for modules is broken. Shadow memory could be freed only after memory shadow corresponds it is no longer used. vfree() called from interrupt context could use memory its freeing to store 'struct llist_node' in it: void vfree(const void *addr) { ... if (unlikely(in_interrupt())) { struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred); if (llist_add((struct llist_node *)addr, &p->list)) schedule_work(&p->wq); Later this list node used in free_work() which actually frees memory. Currently module_memfree() called in interrupt context will free shadow before freeing module's memory which could provoke kernel crash. So shadow memory should be freed after module's memory. However, such deallocation order could race with kasan_module_alloc() in module_alloc(). Free shadow right before releasing vm area. At this point vfree()'d memory is not used anymore and yet not available for other allocations. New VM_KASAN flag used to indicate that vm area has dynamically allocated shadow memory so kasan frees shadow only if it was previously allocated. Signed-off-by: Andrey Ryabinin Acked-by: Rusty Russell Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 5 +++-- include/linux/vmalloc.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 72ba725ddf9c..5fa48a21d73e 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -5,6 +5,7 @@ struct kmem_cache; struct page; +struct vm_struct; #ifdef CONFIG_KASAN @@ -52,7 +53,7 @@ void kasan_slab_free(struct kmem_cache *s, void *object); #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) int kasan_module_alloc(void *addr, size_t size); -void kasan_module_free(void *addr); +void kasan_free_shadow(const struct vm_struct *vm); #else /* CONFIG_KASAN */ @@ -82,7 +83,7 @@ static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_module_free(void *addr) {} +static inline void kasan_free_shadow(const struct vm_struct *vm) {} #endif /* CONFIG_KASAN */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 7d7acb35603d..0ec598381f97 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -17,6 +17,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ +#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ /* bits [20..32] reserved for arch specific ioremap internals */ /* -- cgit v1.2.3 From d3733e5c98e952d419e77fa721912f09d15a2806 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 12 Mar 2015 16:26:14 -0700 Subject: kasan, module: move MODULE_ALIGN macro into include/linux/moduleloader.h is more suitable place for this macro. Also change alignment to PAGE_SIZE for CONFIG_KASAN=n as such alignment already assumed in several places. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ---- include/linux/moduleloader.h | 8 ++++++++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5fa48a21d73e..5bb074431eb0 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -50,15 +50,11 @@ void kasan_krealloc(const void *object, size_t new_size); void kasan_slab_alloc(struct kmem_cache *s, void *object); void kasan_slab_free(struct kmem_cache *s, void *object); -#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) - int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); #else /* CONFIG_KASAN */ -#define MODULE_ALIGN 1 - static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index f7556261fe3c..4d0cb9bba93e 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -84,4 +84,12 @@ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); + +#ifdef CONFIG_KASAN +#include +#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#else +#define MODULE_ALIGN PAGE_SIZE +#endif + #endif -- cgit v1.2.3 From a697c2efba03ac7bfdbffbba7f0f1aa294f7dee0 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 10 Mar 2015 20:31:04 -0700 Subject: of/platform: Fix sparc:allmodconfig build sparc:allmodconfig fails to build with: drivers/built-in.o: In function `platform_bus_init': (.init.text+0x3684): undefined reference to `of_platform_register_reconfig_notifier' of_platform_register_reconfig_notifier is only declared if both OF_ADDRESS and OF_DYNAMIC are configured. Yet, the include file only declares a dummy function if OF_DYNAMIC is not configured. The sparc architecture does not configure OF_ADDRESS, but does configure OF_DYNAMIC, causing above error. Fixes: 801d728c10db ("of/reconfig: Add OF_DYNAMIC notifier for platform_bus_type") Cc: Pantelis Antoniou Signed-off-by: Guenter Roeck Signed-off-by: Rob Herring --- include/linux/of_platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 8a860f096c35..611a691145c4 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -84,7 +84,7 @@ static inline int of_platform_populate(struct device_node *root, static inline void of_platform_depopulate(struct device *parent) { } #endif -#ifdef CONFIG_OF_DYNAMIC +#if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS) extern void of_platform_register_reconfig_notifier(void); #else static inline void of_platform_register_reconfig_notifier(void) { } -- cgit v1.2.3 From 8cb2c2dc472775479a1a7e78180955f6f1cb0b0a Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 12 Mar 2015 12:55:13 +0100 Subject: livepatch: Fix subtle race with coming and going modules There is a notifier that handles live patches for coming and going modules. It takes klp_mutex lock to avoid races with coming and going patches but it does not keep the lock all the time. Therefore the following races are possible: 1. The notifier is called sometime in STATE_MODULE_COMING. The module is visible by find_module() in this state all the time. It means that new patch can be registered and enabled even before the notifier is called. It might create wrong order of stacked patches, see below for an example. 2. New patch could still see the module in the GOING state even after the notifier has been called. It will try to initialize the related object structures but the module could disappear at any time. There will stay mess in the structures. It might even cause an invalid memory access. This patch solves the problem by adding a boolean variable into struct module. The value is true after the coming and before the going handler is called. New patches need to be applied when the value is true and they need to ignore the module when the value is false. Note that we need to know state of all modules on the system. The races are related to new patches. Therefore we do not know what modules will get patched. Also note that we could not simply ignore going modules. The code from the module could be called even in the GOING state until mod->exit() finishes. If we start supporting patches with semantic changes between function calls, we need to apply new patches to any still usable code. See below for an example. Finally note that the patch solves only the situation when a new patch is registered. There are no such problems when the patch is being removed. It does not matter who disable the patch first, whether the normal disable_patch() or the module notifier. There is nothing to do once the patch is disabled. Alternative solutions: ====================== + reject new patches when a patched module is coming or going; this is ugly + wait with adding new patch until the module leaves the COMING and GOING states; this might be dangerous and complicated; we would need to release kgr_lock in the middle of the patch registration to avoid a deadlock with the coming and going handlers; also we might need a waitqueue for each module which seems to be even bigger overhead than the boolean + stop modules from entering COMING and GOING states; wait until modules leave these states when they are already there; looks complicated; we would need to ignore the module that asked to stop the others to avoid a deadlock; also it is unclear what to do when two modules asked to stop others and both are in COMING state (situation when two new patches are applied) + always register/enable new patches and fix up the potential mess (registered patches order) in klp_module_init(); this is nasty and prone to regressions in the future development + add another MODULE_STATE where the kallsyms are visible but the module is not used yet; this looks too complex; the module states are checked on "many" locations Example of patch stacking breakage: =================================== The notifier could _not_ _simply_ ignore already initialized module objects. For example, let's have three patches (P1, P2, P3) for functions a() and b() where a() is from vmcore and b() is from a module M. Something like: a() b() P1 a1() b1() P2 a2() b2() P3 a3() b3(3) If you load the module M after all patches are registered and enabled. The ftrace ops for function a() and b() has listed the functions in this order: ops_a->func_stack -> list(a3,a2,a1) ops_b->func_stack -> list(b3,b2,b1) , so the pointer to b3() is the first and will be used. Then you might have the following scenario. Let's start with state when patches P1 and P2 are registered and enabled but the module M is not loaded. Then ftrace ops for b() does not exist. Then we get into the following race: CPU0 CPU1 load_module(M) complete_formation() mod->state = MODULE_STATE_COMING; mutex_unlock(&module_mutex); klp_register_patch(P3); klp_enable_patch(P3); # STATE 1 klp_module_notify(M) klp_module_notify_coming(P1); klp_module_notify_coming(P2); klp_module_notify_coming(P3); # STATE 2 The ftrace ops for a() and b() then looks: STATE1: ops_a->func_stack -> list(a3,a2,a1); ops_b->func_stack -> list(b3); STATE2: ops_a->func_stack -> list(a3,a2,a1); ops_b->func_stack -> list(b2,b1,b3); therefore, b2() is used for the module but a3() is used for vmcore because they were the last added. Example of the race with going modules: ======================================= CPU0 CPU1 delete_module() #SYSCALL try_stop_module() mod->state = MODULE_STATE_GOING; mutex_unlock(&module_mutex); klp_register_patch() klp_enable_patch() #save place to switch universe b() # from module that is going a() # from core (patched) mod->exit(); Note that the function b() can be called until we call mod->exit(). If we do not apply patch against b() because it is in MODULE_STATE_GOING, it will call patched a() with modified semantic and things might get wrong. [jpoimboe@redhat.com: use one boolean instead of two] Signed-off-by: Petr Mladek Acked-by: Josh Poimboeuf Acked-by: Rusty Russell Signed-off-by: Jiri Kosina --- include/linux/module.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index b653d7c0a05a..7232fde6a991 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -344,6 +344,10 @@ struct module { unsigned long *ftrace_callsites; #endif +#ifdef CONFIG_LIVEPATCH + bool klp_alive; +#endif + #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ struct list_head source_list; -- cgit v1.2.3 From e03826d5045e81a66a4fad7be9a8ecdaeb7911cf Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 17 Mar 2015 15:56:04 +0530 Subject: regulator: palmas: Correct TPS659038 register definition for REGEN2 The register offset for REGEN2_CTRL in different for TPS659038 chip as when compared with other Palmas family PMICs. In the case of TPS659038 the wrong offset pointed to PLLEN_CTRL and was causing a hang. Correcting the same. Signed-off-by: Keerthy Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/linux/mfd/palmas.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index fb0390a1a498..ee7b1ce7a6f8 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -2999,6 +2999,9 @@ enum usb_irq_events { #define PALMAS_GPADC_TRIM15 0x0E #define PALMAS_GPADC_TRIM16 0x0F +/* TPS659038 regen2_ctrl offset iss different from palmas */ +#define TPS659038_REGEN2_CTRL 0x12 + /* TPS65917 Interrupt registers */ /* Registers for function INTERRUPT */ -- cgit v1.2.3 From ad41faa88e39af451427c921a0f8b441e104b6fa Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 17 Mar 2015 11:16:00 +0100 Subject: netdevice.h: fix ndo_bridge_* comments The argument 'flags' was missing in ndo_bridge_setlink(). ndo_bridge_dellink() was missing. Fixes: 407af3299ef1 ("bridge: Add netlink interface to configure vlans on bridge ports") Fixes: add511b38266 ("bridge: add flags argument to ndo_bridge_setlink and ndo_bridge_dellink") CC: Vlad Yasevich CC: Roopa Prabhu Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 429d1790a27e..dcf6ec27739b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -965,9 +965,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * - * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) + * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, + * u16 flags) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, * struct net_device *dev, u32 filter_mask) + * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, + * u16 flags); * * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); * Called to change device carrier. Soft-devices (like dummy, team, etc) -- cgit v1.2.3 From cf39284d41f67964cf42b21bb386c012cf5b7f65 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 18 Mar 2015 08:57:41 +0800 Subject: regulator: Fix documentation for regmap in the config dev_get_regulator() does not exist, fix the typo. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d4ad5b5a02bb..045f709cb89b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -316,7 +316,7 @@ struct regulator_desc { * @driver_data: private regulator data * @of_node: OpenFirmware node to parse for device tree bindings (may be * NULL). - * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is + * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is * insufficient. * @ena_gpio_initialized: GPIO controlling regulator enable was properly * initialized, meaning that >= 0 is a valid gpio -- cgit v1.2.3 From 5067c0469c643512f24786990e315f9c15cc7d24 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 12 Mar 2015 10:32:18 -0700 Subject: ata: Add a new flag to destinguish sas controller SAS controller has its own tag allocation, which doesn't directly match to ATA tag, so SAS and SATA have different code path for ata tags. Originally we use port->scsi_host (98bd4be1) to destinguish SAS controller, but libsas set ->scsi_host too, so we can't use it for the destinguish, we add a new flag for this purpose. Without this patch, the following oops can happen because scsi-mq uses a host-wide tag map shared among all devices with some integer tag values >= ATA_MAX_QUEUE. These unexpectedly high tag values cause __ata_qc_from_tag() to return NULL, which is then dereferenced in ata_qc_new_init(). BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: [] ata_qc_new_init+0x3e/0x120 PGD 32adf0067 PUD 32adf1067 PMD 0 Oops: 0002 [#1] SMP DEBUG_PAGEALLOC Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp eeprom w83795 i2c_i801 CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1 Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b 05/04/12 task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000 RIP: 0010:[] [] ata_qc_new_init+0x3e/0x120 RSP: 0018:ffff88032a067858 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230 RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000 R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000 FS: 0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0 Stack: ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978 ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909 Call Trace: [] ata_scsi_translate+0x3d/0x1b0 [] ata_sas_queuecmd+0x149/0x2a0 [] sas_queuecommand+0xa0/0x1f0 [libsas] [] scsi_dispatch_cmd+0xd4/0x1a0 [] scsi_queue_rq+0x66f/0x7f0 [] __blk_mq_run_hw_queue+0x208/0x3f0 [] blk_mq_run_hw_queue+0x88/0xc0 [] blk_mq_insert_request+0xc4/0x130 [] blk_execute_rq_nowait+0x73/0x160 [] sg_common_write+0x3da/0x720 [sg] [] sg_new_write+0x250/0x360 [sg] [] sg_write+0x13b/0x450 [sg] [] vfs_write+0xd1/0x1b0 [] SyS_write+0x54/0xc0 [] system_call_fastpath+0x12/0x17 tj: updated description. Fixes: 12cb5ce101ab ("libata: use blk taging") Reported-and-tested-by: Tony Battersby Signed-off-by: Shaohua Li Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index fc03efa64ffe..6b08cc106c21 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -232,6 +232,7 @@ enum { * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ + ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ -- cgit v1.2.3 From 074c238177a75f5e79af3b2cb6a84e54823ef950 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Mar 2015 15:55:42 -0700 Subject: mm: numa: slow PTE scan rate if migration failures occur Dave Chinner reported the following on https://lkml.org/lkml/2015/3/1/226 Across the board the 4.0-rc1 numbers are much slower, and the degradation is far worse when using the large memory footprint configs. Perf points straight at the cause - this is from 4.0-rc1 on the "-o bhash=101073" config: - 56.07% 56.07% [kernel] [k] default_send_IPI_mask_sequence_phys - default_send_IPI_mask_sequence_phys - 99.99% physflat_send_IPI_mask - 99.37% native_send_call_func_ipi smp_call_function_many - native_flush_tlb_others - 99.85% flush_tlb_page ptep_clear_flush try_to_unmap_one rmap_walk try_to_unmap migrate_pages migrate_misplaced_page - handle_mm_fault - 99.73% __do_page_fault trace_do_page_fault do_async_page_fault + async_page_fault 0.63% native_send_call_func_single_ipi generic_exec_single smp_call_function_single This is showing excessive migration activity even though excessive migrations are meant to get throttled. Normally, the scan rate is tuned on a per-task basis depending on the locality of faults. However, if migrations fail for any reason then the PTE scanner may scan faster if the faults continue to be remote. This means there is higher system CPU overhead and fault trapping at exactly the time we know that migrations cannot happen. This patch tracks when migration failures occur and slows the PTE scanner. Signed-off-by: Mel Gorman Reported-by: Dave Chinner Tested-by: Dave Chinner Cc: Ingo Molnar Cc: Aneesh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..a419b65770d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1625,11 +1625,11 @@ struct task_struct { /* * numa_faults_locality tracks if faults recorded during the last - * scan window were remote/local. The task scan period is adapted - * based on the locality of the faults with different weights - * depending on whether they were shared or private faults + * scan window were remote/local or failed to migrate. The task scan + * period is adapted based on the locality of the faults with different + * weights depending on whether they were shared or private faults */ - unsigned long numa_faults_locality[2]; + unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ @@ -1719,6 +1719,7 @@ struct task_struct { #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 #define TNF_FAULT_LOCAL 0x08 +#define TNF_MIGRATE_FAIL 0x10 #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); -- cgit v1.2.3 From dbbc14775ae395a50d91f22904670ca4c9297542 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Mar 2015 14:33:33 -0400 Subject: SUNRPC: Introduce missing well-known netids Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/msg_prot.h | 8 +++++++- include/linux/sunrpc/xprtrdma.h | 5 ----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index aadc6a04e1ac..807371357160 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -142,12 +142,18 @@ typedef __be32 rpc_fraghdr; (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) /* - * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. + * Well-known netids. See: + * + * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml */ #define RPCBIND_NETID_UDP "udp" #define RPCBIND_NETID_TCP "tcp" +#define RPCBIND_NETID_RDMA "rdma" +#define RPCBIND_NETID_SCTP "sctp" #define RPCBIND_NETID_UDP6 "udp6" #define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_RDMA6 "rdma6" +#define RPCBIND_NETID_SCTP6 "sctp6" #define RPCBIND_NETID_LOCAL "local" /* diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 64a0a0a97b23..c984c85981ea 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -40,11 +40,6 @@ #ifndef _LINUX_SUNRPC_XPRTRDMA_H #define _LINUX_SUNRPC_XPRTRDMA_H -/* - * rpcbind (v3+) RDMA netid. - */ -#define RPCBIND_NETID_RDMA "rdma" - /* * Constants. Max RPC/NFS header is big enough to account for * additional marshaling buffers passed down by Linux client. -- cgit v1.2.3