From f750847daa22bee9cbb3309f11e8c2eef7bbe5c6 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 23 Aug 2016 16:20:38 +0200 Subject: mm: introduce get_task_exe_file commit cd81a9170e69e018bbaba547c1fd85a585f5697a upstream. For more convenient access if one has a pointer to the task. As a minor nit take advantage of the fact that only task lock + rcu are needed to safely grab ->exe_file. This saves mm refcount dance. Use the helper in proc_exe_link. Signed-off-by: Mateusz Guzik Acked-by: Konstantin Khlebnikov Acked-by: Richard Guy Briggs Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8a761248d01e..cfebb742ee18 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1898,6 +1898,7 @@ extern void mm_drop_all_locks(struct mm_struct *mm); extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); +extern struct file *get_task_exe_file(struct task_struct *task); extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, -- cgit v1.2.3 From 023e76b1d1fd4857ba6af6e99dfbefd667aae6d1 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Wed, 17 Aug 2016 17:43:01 +0530 Subject: iio: adc: ti_am335x_adc: Increase timeout value waiting for ADC sample commit 7175cce1c3f1d8c8840d2004f78f96a3904249b5 upstream. Now that open delay and sample delay for each channel is configurable via DT, the default IDLE_TIMEOUT value is not enough as this is calculated based on hardcoded macros. This results in driver returning EBUSY sometimes. Fix this by increasing the timeout value based on maximum value possible to open delay and sample delays for each channel. Fixes: 5dc11e810676e ("iio: adc: ti_am335x_adc: make sample delay, open delay, averaging DT parameters") Signed-off-by: Vignesh R Acked-by: Lee Jones Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/ti_am335x_tscadc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 1fd50dcfe47c..175c82699e9d 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -138,16 +138,16 @@ /* * time in us for processing a single channel, calculated as follows: * - * num cycles = open delay + (sample delay + conv time) * averaging + * max num cycles = open delay + (sample delay + conv time) * averaging * - * num cycles: 152 + (1 + 13) * 16 = 376 + * max num cycles: 262143 + (255 + 13) * 16 = 266431 * * clock frequency: 26MHz / 8 = 3.25MHz * clock period: 1 / 3.25MHz = 308ns * - * processing time: 376 * 308ns = 116us + * max processing time: 266431 * 308ns = 83ms(approx) */ -#define IDLE_TIMEOUT 116 /* microsec */ +#define IDLE_TIMEOUT 83 /* milliseconds */ #define TSCADC_CELLS 2 -- cgit v1.2.3 From 59e62eb42a5923a095c7616ef9997327e0bfdb70 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 Nov 2015 15:14:23 +0100 Subject: mmc: dw_mmc: use resource_size_t to store physical address commit 260b31643691e8a58683a4ccc3bdf7abfd86f54a upstream. The dw_mmc driver stores the physical address of the MMIO registers in a pointer, which requires the use of type casts, and is actually broken if anyone ever has this device on a 32-bit SoC in registers above 4GB. Gcc warns about this possibility when the driver is built with ARM LPAE enabled: mmc/host/dw_mmc.c: In function 'dw_mci_edmac_start_dma': mmc/host/dw_mmc.c:702:17: warning: cast from pointer to integer of different size cfg.dst_addr = (dma_addr_t)(host->phy_regs + fifo_offset); ^ mmc/host/dw_mmc-pltfm.c: In function 'dw_mci_pltfm_register': mmc/host/dw_mmc-pltfm.c:63:19: warning: cast to pointer from integer of different size host->phy_regs = (void *)(regs->start); This changes the code to use resource_size_t, which gets rid of the warning, the bug and the useless casts. Signed-off-by: Arnd Bergmann Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- include/linux/mmc/dw_mmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index f67b2ec18e6d..7776afb0ffa5 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -172,7 +172,7 @@ struct dw_mci { /* For edmac */ struct dw_mci_dma_slave *dms; /* Registers's physical base address */ - void *phy_regs; + resource_size_t phy_regs; u32 cmd_status; u32 data_status; -- cgit v1.2.3 From b214985cfaeda53495f9f115cb946b42432f4d6d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Jan 2016 11:43:44 +0100 Subject: net: simplify napi_synchronize() to avoid warnings commit facc432faa59414bd7c60c307ff1645154a66c98 upstream. The napi_synchronize() function is defined twice: The definition for SMP builds waits for other CPUs to be done, while the uniprocessor variant just contains a barrier and ignores its argument. In the mvneta driver, this leads to a warning about an unused variable when we lookup the NAPI struct of another CPU and then don't use it: ethernet/marvell/mvneta.c: In function 'mvneta_percpu_notifier': ethernet/marvell/mvneta.c:2910:30: error: unused variable 'other_port' [-Werror=unused-variable] There are no other CPUs on a UP build, so that code never runs, but gcc does not know this. The nicest solution seems to be to turn the napi_synchronize() helper into an inline function for the UP case as well, as that leads gcc to not complain about the argument being unused. Once we do that, we can also combine the two cases into a single function definition and use if(IS_ENABLED()) rather than #ifdef to make it look a bit nicer. The warning first came up in linux-4.4, but I failed to catch it earlier. Signed-off-by: Arnd Bergmann Fixes: f86428854480 ("net: mvneta: Statically assign queues to CPUs") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 04c068e55353..b97d6823ef3c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -511,7 +511,6 @@ static inline void napi_enable(struct napi_struct *n) clear_bit(NAPI_STATE_NPSVC, &n->state); } -#ifdef CONFIG_SMP /** * napi_synchronize - wait until NAPI is not running * @n: napi context @@ -522,12 +521,12 @@ static inline void napi_enable(struct napi_struct *n) */ static inline void napi_synchronize(const struct napi_struct *n) { - while (test_bit(NAPI_STATE_SCHED, &n->state)) - msleep(1); + if (IS_ENABLED(CONFIG_SMP)) + while (test_bit(NAPI_STATE_SCHED, &n->state)) + msleep(1); + else + barrier(); } -#else -# define napi_synchronize(n) barrier() -#endif enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, -- cgit v1.2.3 From 17b54ccf496f88d1a1d404f3e56710f692c8afbd Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 13 Sep 2016 15:58:28 +0200 Subject: genirq: Provide irq_gc_{lock_irqsave,unlock_irqrestore}() helpers commit ebf9ff753c041b296241990aef76163bbb2cc9c8 upstream. Some irqchip drivers need to take the generic chip lock outside of the irq context. Provide the irq_gc_{lock_irqsave,unlock_irqrestore}() helpers to allow one to disable irqs while entering a critical section protected by gc->lock. Note that we do not provide optimized version of these helpers for !SMP, because they are not called from the hot-path. [ tglx: Added a comment when these helpers should be [not] used ] Signed-off-by: Boris Brezillon Cc: Jason Cooper Cc: Marc Zyngier Cc: Nicolas Ferre Cc: Alexandre Belloni Link: http://lkml.kernel.org/r/1473775109-4192-1-git-send-email-boris.brezillon@free-electrons.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/irq.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 3c1c96786248..f7cade00c525 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -916,6 +916,16 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { } static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } #endif +/* + * The irqsave variants are for usage in non interrupt code. Do not use + * them in irq_chip callbacks. Use irq_gc_lock() instead. + */ +#define irq_gc_lock_irqsave(gc, flags) \ + raw_spin_lock_irqsave(&(gc)->lock, flags) + +#define irq_gc_unlock_irqrestore(gc, flags) \ + raw_spin_unlock_irqrestore(&(gc)->lock, flags) + static inline void irq_reg_writel(struct irq_chip_generic *gc, u32 val, int reg_offset) { -- cgit v1.2.3 From 99526912c934f848e5dc1065ec6a1c1c33b1a8d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Sep 2016 00:11:45 +0100 Subject: fix iov_iter_fault_in_readable() commit d4690f1e1cdabb4d61207b6787b1605a0dc0aeab upstream. ... by turning it into what used to be multipages counterpart Signed-off-by: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/uio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 8b01e1c3c614..5f9c59da978b 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -76,7 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes); +#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); -- cgit v1.2.3 From df127725783f45e0f7681f7def2ca259ee9ef4ae Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Fri, 15 Jan 2016 16:57:58 -0800 Subject: include/linux/kernel.h: change abs() macro so it uses consistent return type commit 8f57e4d930d48217268315898212518d4d3e0773 upstream. Rewrite abs() so that its return type does not depend on the architecture and no unexpected type conversion happen inside of it. The only conversion is from unsigned to signed type. char is left as a return type but treated as a signed type regradless of it's actual signedness. With the old version, int arguments were promoted to long and depending on architecture a long argument might result in s64 or long return type (which may or may not be the same). This came after some back and forth with Nicolas. The current macro has different return type (for the same input type) depending on architecture which might be midly iritating. An alternative version would promote to int like so: #define abs(x) __abs_choose_expr(x, long long, \ __abs_choose_expr(x, long, \ __builtin_choose_expr( \ sizeof(x) <= sizeof(int), \ ({ int __x = (x); __x<0?-__x:__x; }), \ ((void)0)))) I have no preference but imagine Linus might. :] Nicolas argument against is that promoting to int causes iconsistent behaviour: int main(void) { unsigned short a = 0, b = 1, c = a - b; unsigned short d = abs(a - b); unsigned short e = abs(c); printf("%u %u\n", d, e); // prints: 1 65535 } Then again, no sane person expects consistent behaviour from C integer arithmetic. ;) Note: __builtin_types_compatible_p(unsigned char, char) is always false, and __builtin_types_compatible_p(signed char, char) is also always false. Signed-off-by: Michal Nazarewicz Reviewed-by: Nicolas Pitre Cc: Srinivas Pandruvada Cc: Wey-Yi Guy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 924853d33a13..e571e592e53a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -202,26 +202,26 @@ extern int _cond_resched(void); /** * abs - return absolute value of an argument - * @x: the value. If it is unsigned type, it is converted to signed type first - * (s64, long or int depending on its size). + * @x: the value. If it is unsigned type, it is converted to signed type first. + * char is treated as if it was signed (regardless of whether it really is) + * but the macro's return type is preserved as char. * - * Return: an absolute value of x. If x is 64-bit, macro's return type is s64, - * otherwise it is signed long. + * Return: an absolute value of x. */ -#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(s64), ({ \ - s64 __x = (x); \ - (__x < 0) ? -__x : __x; \ - }), ({ \ - long ret; \ - if (sizeof(x) == sizeof(long)) { \ - long __x = (x); \ - ret = (__x < 0) ? -__x : __x; \ - } else { \ - int __x = (x); \ - ret = (__x < 0) ? -__x : __x; \ - } \ - ret; \ - })) +#define abs(x) __abs_choose_expr(x, long long, \ + __abs_choose_expr(x, long, \ + __abs_choose_expr(x, int, \ + __abs_choose_expr(x, short, \ + __abs_choose_expr(x, char, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), char), \ + (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ + ((void)0))))))) + +#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), signed type) || \ + __builtin_types_compatible_p(typeof(x), unsigned type), \ + ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) /** * reciprocal_scale - "scale" a value into range [0, ep_ro) -- cgit v1.2.3 From f357a79839f95f5ea7baf1c1366d64796d833bca Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Thu, 1 Sep 2016 22:18:34 -0700 Subject: bonding: Fix bonding crash [ Upstream commit 24b27fc4cdf9e10c5e79e5923b6b7c2c5c95096c ] Following few steps will crash kernel - (a) Create bonding master > modprobe bonding miimon=50 (b) Create macvlan bridge on eth2 > ip link add link eth2 dev mvl0 address aa:0:0:0:0:01 \ type macvlan (c) Now try adding eth2 into the bond > echo +eth2 > /sys/class/net/bond0/bonding/slaves Bonding does lots of things before checking if the device enslaved is busy or not. In this case when the notifier call-chain sends notifications, the bond_netdev_event() assumes that the rx_handler /rx_handler_data is registered while the bond_enslave() hasn't progressed far enough to register rx_handler for the new slave. This patch adds a rx_handler check that can be performed right at the beginning of the enslave code to avoid getting into this situation. Signed-off-by: Mahesh Bandewar Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b97d6823ef3c..4e9c75226f07 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3036,6 +3036,7 @@ static inline void napi_free_frags(struct napi_struct *napi) napi->skb = NULL; } +bool netdev_is_rx_handler_busy(struct net_device *dev); int netdev_rx_handler_register(struct net_device *dev, rx_handler_func_t *rx_handler, void *rx_handler_data); -- cgit v1.2.3 From 8c945f5aac28a81f20f6a5208a6ccab43b8e7a89 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 27 Aug 2016 17:33:03 +0100 Subject: net: smc91x: fix SMC accesses [ Upstream commit 2fb04fdf30192ff1e2b5834e9b7745889ea8bbcb ] Commit b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines") broke some ARM platforms through several mistakes. Firstly, the access size must correspond to the following rule: (a) at least one of 16-bit or 8-bit access size must be supported (b) 32-bit accesses are optional, and may be enabled in addition to the above. Secondly, it provides no emulation of 16-bit accesses, instead blindly making 16-bit accesses even when the platform specifies that only 8-bit is supported. Reorganise smc91x.h so we can make use of the existing 16-bit access emulation already provided - if 16-bit accesses are supported, use 16-bit accesses directly, otherwise if 8-bit accesses are supported, use the provided 16-bit access emulation. If neither, BUG(). This exactly reflects the driver behaviour prior to the commit being fixed. Since the conversion incorrectly cut down the available access sizes on several platforms, we also need to go through every platform and fix up the overly-restrictive access size: Arnd assumed that if a platform can perform 32-bit, 16-bit and 8-bit accesses, then only a 32-bit access size needed to be specified - not so, all available access sizes must be specified. This likely fixes some performance regressions in doing this: if a platform does not support 8-bit accesses, 8-bit accesses have been emulated by performing a 16-bit read-modify-write access. Tested on the Intel Assabet/Neponset platform, which supports only 8-bit accesses, which was broken by the original commit. Fixes: b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines") Signed-off-by: Russell King Tested-by: Robert Jarzmik Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/smc91x.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index 76199b75d584..e302c447e057 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -1,6 +1,16 @@ #ifndef __SMC91X_H__ #define __SMC91X_H__ +/* + * These bits define which access sizes a platform can support, rather + * than the maximal access size. So, if your platform can do 16-bit + * and 32-bit accesses to the SMC91x device, but not 8-bit, set both + * SMC91X_USE_16BIT and SMC91X_USE_32BIT. + * + * The SMC91x driver requires at least one of SMC91X_USE_8BIT or + * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is + * an invalid configuration. + */ #define SMC91X_USE_8BIT (1 << 0) #define SMC91X_USE_16BIT (1 << 1) #define SMC91X_USE_32BIT (1 << 2) -- cgit v1.2.3 From af426ec184a38d257457a22554542d4f9bb7eecd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 14:44:27 -0700 Subject: fsnotify: add a way to stop queueing events on group shutdown commit 12703dbfeb15402260e7554d32a34ac40c233990 upstream. Implement a function that can be called when a group is being shutdown to stop queueing new events to the group. Fanotify will use this. Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events") Link: http://lkml.kernel.org/r/1473797711-14111-2-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/fsnotify_backend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 533c4408529a..5dfba9a8a7be 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -148,6 +148,7 @@ struct fsnotify_group { #define FS_PRIO_1 1 /* fanotify content based access control */ #define FS_PRIO_2 2 /* fanotify pre-content access */ unsigned int priority; + bool shutdown; /* group is being shut down, don't queue more events */ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ @@ -308,6 +309,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); +/* group destruction begins, stop queuing new events */ +extern void fsnotify_group_stop_queueing(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); /* fasync handler function */ -- cgit v1.2.3 From 6e67de3922f25ae43fc86be362d740d88167ae7f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 14:44:30 -0700 Subject: fanotify: fix list corruption in fanotify_get_response() commit 96d41019e3ac55f6f0115b0ce97e4f24a3d636d2 upstream. fanotify_get_response() calls fsnotify_remove_event() when it finds that group is being released from fanotify_release() (bypass_perm is set). However the event it removes need not be only in the group's notification queue but it can have already moved to access_list (userspace read the event before closing the fanotify instance fd) which is protected by a different lock. Thus when fsnotify_remove_event() races with fanotify_release() operating on access_list, the list can get corrupted. Fix the problem by moving all the logic removing permission events from the lists to one place - fanotify_release(). Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events") Link: http://lkml.kernel.org/r/1473797711-14111-3-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Miklos Szeredi Tested-by: Miklos Szeredi Reviewed-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/fsnotify_backend.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 5dfba9a8a7be..850d8822e8ff 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -180,7 +180,6 @@ struct fsnotify_group { spinlock_t access_lock; struct list_head access_list; wait_queue_head_t access_waitq; - atomic_t bypass_perm; #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; @@ -323,8 +322,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct list_head *, struct fsnotify_event *)); -/* Remove passed event from groups notification queue */ -extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event); /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ -- cgit v1.2.3 From 3f5d8326a870729ae21aa0a6b132cf8d9e9fcc1e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Sep 2016 20:07:42 +0100 Subject: fix fault_in_multipages_...() on architectures with no-op access_ok() commit e23d4159b109167126e5bcd7f3775c95de7fee47 upstream. Switching iov_iter fault-in to multipages variants has exposed an old bug in underlying fault_in_multipages_...(); they break if the range passed to them wraps around. Normally access_ok() done by callers will prevent such (and it's a guaranteed EFAULT - ERR_PTR() values fall into such a range and they should not point to any valid objects). However, on architectures where userland and kernel live in different MMU contexts (e.g. s390) access_ok() is a no-op and on those a range with a wraparound can reach fault_in_multipages_...(). Since any wraparound means EFAULT there, the fix is trivial - turn those while (uaddr <= end) ... into if (unlikely(uaddr > end)) return -EFAULT; do ... while (uaddr <= end); Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/pagemap.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 26eabf5ec718..fbfadba81c5a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -601,56 +601,56 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) */ static inline int fault_in_multipages_writeable(char __user *uaddr, int size) { - int ret = 0; char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return ret; + return 0; + if (unlikely(uaddr > end)) + return -EFAULT; /* * Writing zeroes into userspace here is OK, because we know that if * the zero gets there, we'll be overwriting it. */ - while (uaddr <= end) { - ret = __put_user(0, uaddr); - if (ret != 0) - return ret; + do { + if (unlikely(__put_user(0, uaddr) != 0)) + return -EFAULT; uaddr += PAGE_SIZE; - } + } while (uaddr <= end); /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & PAGE_MASK) == ((unsigned long)end & PAGE_MASK)) - ret = __put_user(0, end); + return __put_user(0, end); - return ret; + return 0; } static inline int fault_in_multipages_readable(const char __user *uaddr, int size) { volatile char c; - int ret = 0; const char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return ret; + return 0; - while (uaddr <= end) { - ret = __get_user(c, uaddr); - if (ret != 0) - return ret; + if (unlikely(uaddr > end)) + return -EFAULT; + + do { + if (unlikely(__get_user(c, uaddr) != 0)) + return -EFAULT; uaddr += PAGE_SIZE; - } + } while (uaddr <= end); /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & PAGE_MASK) == ((unsigned long)end & PAGE_MASK)) { - ret = __get_user(c, end); - (void)c; + return __get_user(c, end); } - return ret; + return 0; } int add_to_page_cache_locked(struct page *page, struct address_space *mapping, -- cgit v1.2.3 From 1b4b2f16f6759bf5b0937a475e82f677e813f304 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 31 Aug 2016 16:04:21 -0700 Subject: usercopy: fold builtin_const check into inline function Instead of having each caller of check_object_size() need to remember to check for a const size parameter, move the check into check_object_size() itself. This actually matches the original implementation in PaX, though this commit cleans up the now-redundant builtin_const() calls in the various architectures. Signed-off-by: Kees Cook (cherry picked from commit 81409e9e28058811c9ea865345e1753f8f677e44) Signed-off-by: Alex Shi --- include/linux/thread_info.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 0ae29ff9ccfd..eded095fe81e 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -161,7 +161,8 @@ extern void __check_object_size(const void *ptr, unsigned long n, static inline void check_object_size(const void *ptr, unsigned long n, bool to_user) { - __check_object_size(ptr, n, to_user); + if (!__builtin_constant_p(n)) + __check_object_size(ptr, n, to_user); } #else static inline void check_object_size(const void *ptr, unsigned long n, -- cgit v1.2.3 From 7677956e4094595732df548681ea9c4d66821bdb Mon Sep 17 00:00:00 2001 From: Sergei Miroshnichenko Date: Wed, 7 Sep 2016 16:51:12 +0300 Subject: can: dev: fix deadlock reported after bus-off commit 9abefcb1aaa58b9d5aa40a8bb12c87d02415e4c8 upstream. A timer was used to restart after the bus-off state, leading to a relatively large can_restart() executed in an interrupt context, which in turn sets up pinctrl. When this happens during system boot, there is a high probability of grabbing the pinctrl_list_mutex, which is locked already by the probe() of other device, making the kernel suspect a deadlock condition [1]. To resolve this issue, the restart_timer is replaced by a delayed work. [1] https://github.com/victronenergy/venus/issues/24 Signed-off-by: Sergei Miroshnichenko Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- include/linux/can/dev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5261751f6bd4..5f5270941ba0 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -32,6 +32,7 @@ enum can_mode { * CAN common private data */ struct can_priv { + struct net_device *dev; struct can_device_stats can_stats; struct can_bittiming bittiming, data_bittiming; @@ -47,7 +48,7 @@ struct can_priv { u32 ctrlmode_static; /* static enabled options for driver/hardware */ int restart_ms; - struct timer_list restart_timer; + struct delayed_work restart_work; int (*do_set_bittiming)(struct net_device *dev); int (*do_set_data_bittiming)(struct net_device *dev); -- cgit v1.2.3 From 8e8a07f900d03c1230142749c0f01b6ef2cae8f7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 2 Jun 2016 12:05:12 +0100 Subject: nvmem: Declare nvmem_cell_read() consistently commit a6c50912508d80164a5e607993b617be85a46d73 upstream. nvmem_cell_read() is declared as void * if CONFIG_NVMEM is enabled, and as char * otherwise. This can result in a build warning if CONFIG_NVMEM is not enabled and a caller asigns the result to a type other than char * without using a typecast. Use a consistent declaration to avoid the problem. Fixes: e2a5402ec7c6 ("nvmem: Add nvmem_device based consumer apis.") Cc: Srinivas Kandagatla Signed-off-by: Guenter Roeck Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 9bb77d3ed6e0..c2256d746543 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -74,7 +74,7 @@ static inline void nvmem_cell_put(struct nvmem_cell *cell) { } -static inline char *nvmem_cell_read(struct nvmem_cell *cell, size_t *len) +static inline void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len) { return ERR_PTR(-ENOSYS); } -- cgit v1.2.3 From 70cd763eb1574cac07138be91f474a661e02d694 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 25 Aug 2016 15:16:51 -0700 Subject: sysctl: handle error writing UINT_MAX to u32 fields commit e7d316a02f683864a12389f8808570e37fb90aa3 upstream. We have scripts which write to certain fields on 3.18 kernels but this seems to be failing on 4.4 kernels. An entry which we write to here is xfrm_aevent_rseqth which is u32. echo 4294967295 > /proc/sys/net/core/xfrm_aevent_rseqth Commit 230633d109e3 ("kernel/sysctl.c: detect overflows when converting to int") prevented writing to sysctl entries when integer overflow occurs. However, this does not apply to unsigned integers. Heinrich suggested that we introduce a new option to handle 64 bit limits and set min as 0 and max as UINT_MAX. This might not work as it leads to issues similar to __do_proc_doulongvec_minmax. Alternatively, we would need to change the datatype of the entry to 64 bit. static int __do_proc_doulongvec_minmax(void *data, struct ctl_table { i = (unsigned long *) data; //This cast is causing to read beyond the size of data (u32) vleft = table->maxlen / sizeof(unsigned long); //vleft is 0 because maxlen is sizeof(u32) which is lesser than sizeof(unsigned long) on x86_64. Introduce a new proc handler proc_douintvec. Individual proc entries will need to be updated to use the new handler. [akpm@linux-foundation.org: coding-style fixes] Fixes: 230633d109e3 ("kernel/sysctl.c:detect overflows when converting to int") Link: http://lkml.kernel.org/r/1471479806-5252-1-git-send-email-subashab@codeaurora.org Signed-off-by: Subash Abhinov Kasiviswanathan Cc: Heinrich Schuchardt Cc: Kees Cook Cc: "David S. Miller" Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/sysctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index fa7bc29925c9..ef17db6caaed 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -41,6 +41,8 @@ extern int proc_dostring(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); +extern int proc_douintvec(struct ctl_table *, int, + void __user *, size_t *, loff_t *); extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_jiffies(struct ctl_table *, int, -- cgit v1.2.3 From 85460b112db6969a2f3b889b241a2ca1a0778f03 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 8 Sep 2016 00:42:25 +0900 Subject: net: inet: diag: expose the socket mark to privileged processes. This adds the capability for a process that has CAP_NET_ADMIN on a socket to see the socket mark in socket dumps. Commit a52e95abf772 ("net: diag: allow socket bytecode filters to match socket marks") recently gave privileged processes the ability to filter socket dumps based on mark. This patch is complementary: it ensures that the mark is also passed to userspace in the socket's netlink attributes. It is useful for tools like ss which display information about sockets. [backport of net-next d545caca827b65aab557a9e9dcdcf1e5a3823c2d] Change-Id: I33336ed9c3ee3fb78fe05c4c47b7fd18c6e33ef1 Tested: https://android-review.googlesource.com/270210 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 7c27fa1030e8..795852dc3434 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -37,7 +37,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh); + const struct nlmsghdr *unlh, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, -- cgit v1.2.3 From a400076119c0ce8228bf36a4f8fdb69b1c242523 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 30 Nov 2015 13:28:15 +0100 Subject: UPSTREAM: mm/memblock: add MEMBLOCK_NOMAP attribute to memblock memory table This introduces the MEMBLOCK_NOMAP attribute and the required plumbing to make it usable as an indicator that some parts of normal memory should not be covered by the kernel direct mapping. It is up to the arch to actually honor the attribute when laying out this mapping, but the memblock code itself is modified to disregard these regions for allocations and other general use. Cc: linux-mm@kvack.org Cc: Alexander Kuleshov Cc: Andrew Morton Reviewed-by: Matt Fleming Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Change-Id: I55cd3abdf514ac54c071fa0037d8dac73bda798d (cherry picked from commit bf3d3cc580f9960883ebf9ea05868f336d9491c2) Signed-off-by: Sami Tolvanen --- include/linux/memblock.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 24daf8fc4d7c..fec66f86eeff 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -25,6 +25,7 @@ enum { MEMBLOCK_NONE = 0x0, /* No special request */ MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */ MEMBLOCK_MIRROR = 0x2, /* mirrored region */ + MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ }; struct memblock_region { @@ -82,6 +83,7 @@ bool memblock_overlaps_region(struct memblock_type *type, int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); +int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); ulong choose_memblock_flags(void); /* Low level functions */ @@ -184,6 +186,11 @@ static inline bool memblock_is_mirror(struct memblock_region *m) return m->flags & MEMBLOCK_MIRROR; } +static inline bool memblock_is_nomap(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_NOMAP; +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); @@ -319,6 +326,7 @@ phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); int memblock_is_memory(phys_addr_t addr); +int memblock_is_map_memory(phys_addr_t addr); int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); int memblock_is_reserved(phys_addr_t addr); bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); -- cgit v1.2.3 From f79fcde0efa7a7791ef5a98782c868c6091703d4 Mon Sep 17 00:00:00 2001 From: Mohan Srinivasan Date: Mon, 3 Oct 2016 16:17:34 -0700 Subject: Fix a build breakage in IO latency hist code. Fix a build breakage where MMC is enabled, but BLOCK is not. Change-Id: I0eb422d12264f0371f3368ae7c37342ef9efabaa Signed-off-by: Mohan Srinivasan --- include/linux/mmc/core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 3349f0676acb..0860efd6e1be 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -137,7 +137,9 @@ struct mmc_request { void (*done)(struct mmc_request *);/* completion function */ struct mmc_host *host; ktime_t io_start; +#ifdef CONFIG_BLOCK int lat_hist_enabled; +#endif }; struct mmc_card; -- cgit v1.2.3 From 0114e3e52730b76e0976a43ef496d4925eda1893 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2016 08:26:56 +0300 Subject: mfd: 88pm80x: Double shifting bug in suspend/resume commit 9a6dc644512fd083400a96ac4a035ac154fe6b8d upstream. set_bit() and clear_bit() take the bit number so this code is really doing "1 << (1 << irq)" which is a double shift bug. It's done consistently so it won't cause a problem unless "irq" is more than 4. Fixes: 70c6cce04066 ('mfd: Support 88pm80x in 80x driver') Signed-off-by: Dan Carpenter Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/88pm80x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h index d409ceb2231e..c118a7ec94d6 100644 --- a/include/linux/mfd/88pm80x.h +++ b/include/linux/mfd/88pm80x.h @@ -350,7 +350,7 @@ static inline int pm80x_dev_suspend(struct device *dev) int irq = platform_get_irq(pdev, 0); if (device_may_wakeup(dev)) - set_bit((1 << irq), &chip->wu_flag); + set_bit(irq, &chip->wu_flag); return 0; } @@ -362,7 +362,7 @@ static inline int pm80x_dev_resume(struct device *dev) int irq = platform_get_irq(pdev, 0); if (device_may_wakeup(dev)) - clear_bit((1 << irq), &chip->wu_flag); + clear_bit(irq, &chip->wu_flag); return 0; } -- cgit v1.2.3 From 1294d355881cc5c3421d24fee512f16974addb6c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 13 Oct 2016 13:07:36 -0700 Subject: mm: remove gup_flags FOLL_WRITE games from __get_user_pages() commit 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 upstream. This is an ancient bug that was actually attempted to be fixed once (badly) by me eleven years ago in commit 4ceb5db9757a ("Fix get_user_pages() race for write access") but that was then undone due to problems on s390 by commit f33ea7f404e5 ("fix get_user_pages bug"). In the meantime, the s390 situation has long been fixed, and we can now fix it by checking the pte_dirty() bit properly (and do it better). The s390 dirty bit was implemented in abf09bed3cce ("s390/mm: implement software dirty bits") which made it into v3.9. Earlier kernels will have to look at the page state itself. Also, the VM has become more scalable, and what used a purely theoretical race back then has become easier to trigger. To fix it, we introduce a new internal FOLL_COW flag to mark the "yes, we already did a COW" rather than play racy games with FOLL_WRITE that is very fundamental, and then use the pte dirty flag to validate that the FOLL_COW flag is still valid. Reported-and-tested-by: Phil "not Paul" Oester Acked-by: Hugh Dickins Reviewed-by: Michal Hocko Cc: Andy Lutomirski Cc: Kees Cook Cc: Oleg Nesterov Cc: Willy Tarreau Cc: Nick Piggin Cc: Greg Thelen Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index cfebb742ee18..f0ffa01c90d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2112,6 +2112,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_MLOCK 0x1000 /* lock present pages */ +#define FOLL_COW 0x4000 /* internal GUP flag */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); -- cgit v1.2.3 From ef2209ec3ee8a3c9238d329d1e6afa9b22766962 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Mon, 4 Jan 2016 15:37:32 +0100 Subject: ARM: 8478/2: arm/arm64: add arm-smccc Adds helpers to do SMC and HVC based on ARM SMC Calling Convention. CONFIG_HAVE_ARM_SMCCC is enabled for architectures that may support the SMC or HVC instruction. It's the responsibility of the caller to know if the SMC instruction is supported by the platform. This patch doesn't provide an implementation of the declared functions. Later patches will bring in implementations and set CONFIG_HAVE_ARM_SMCCC for ARM and ARM64 respectively. Reviewed-by: Lorenzo Pieralisi Signed-off-by: Jens Wiklander Signed-off-by: Russell King (cherry picked from commit 98dd64f34f47ce19b388d9015f767f48393a81eb) Signed-off-by: Alex Shi --- include/linux/arm-smccc.h | 104 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 include/linux/arm-smccc.h (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h new file mode 100644 index 000000000000..b5abfda80465 --- /dev/null +++ b/include/linux/arm-smccc.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __LINUX_ARM_SMCCC_H +#define __LINUX_ARM_SMCCC_H + +#include +#include + +/* + * This file provides common defines for ARM SMC Calling Convention as + * specified in + * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html + */ + +#define ARM_SMCCC_STD_CALL 0 +#define ARM_SMCCC_FAST_CALL 1 +#define ARM_SMCCC_TYPE_SHIFT 31 + +#define ARM_SMCCC_SMC_32 0 +#define ARM_SMCCC_SMC_64 1 +#define ARM_SMCCC_CALL_CONV_SHIFT 30 + +#define ARM_SMCCC_OWNER_MASK 0x3F +#define ARM_SMCCC_OWNER_SHIFT 24 + +#define ARM_SMCCC_FUNC_MASK 0xFFFF + +#define ARM_SMCCC_IS_FAST_CALL(smc_val) \ + ((smc_val) & (ARM_SMCCC_FAST_CALL << ARM_SMCCC_TYPE_SHIFT)) +#define ARM_SMCCC_IS_64(smc_val) \ + ((smc_val) & (ARM_SMCCC_SMC_64 << ARM_SMCCC_CALL_CONV_SHIFT)) +#define ARM_SMCCC_FUNC_NUM(smc_val) ((smc_val) & ARM_SMCCC_FUNC_MASK) +#define ARM_SMCCC_OWNER_NUM(smc_val) \ + (((smc_val) >> ARM_SMCCC_OWNER_SHIFT) & ARM_SMCCC_OWNER_MASK) + +#define ARM_SMCCC_CALL_VAL(type, calling_convention, owner, func_num) \ + (((type) << ARM_SMCCC_TYPE_SHIFT) | \ + ((calling_convention) << ARM_SMCCC_CALL_CONV_SHIFT) | \ + (((owner) & ARM_SMCCC_OWNER_MASK) << ARM_SMCCC_OWNER_SHIFT) | \ + ((func_num) & ARM_SMCCC_FUNC_MASK)) + +#define ARM_SMCCC_OWNER_ARCH 0 +#define ARM_SMCCC_OWNER_CPU 1 +#define ARM_SMCCC_OWNER_SIP 2 +#define ARM_SMCCC_OWNER_OEM 3 +#define ARM_SMCCC_OWNER_STANDARD 4 +#define ARM_SMCCC_OWNER_TRUSTED_APP 48 +#define ARM_SMCCC_OWNER_TRUSTED_APP_END 49 +#define ARM_SMCCC_OWNER_TRUSTED_OS 50 +#define ARM_SMCCC_OWNER_TRUSTED_OS_END 63 + +/** + * struct arm_smccc_res - Result from SMC/HVC call + * @a0-a3 result values from registers 0 to 3 + */ +struct arm_smccc_res { + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; +}; + +/** + * arm_smccc_smc() - make SMC calls + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 + * + * This function is used to make SMC calls following SMC Calling Convention. + * The content of the supplied param are copied to registers 0 to 7 prior + * to the SMC instruction. The return values are updated with the content + * from register 0 to 3 on return from the SMC instruction. + */ +asmlinkage void arm_smccc_smc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, unsigned long a4, + unsigned long a5, unsigned long a6, unsigned long a7, + struct arm_smccc_res *res); + +/** + * arm_smccc_hvc() - make HVC calls + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 + * + * This function is used to make HVC calls following SMC Calling + * Convention. The content of the supplied param are copied to registers 0 + * to 7 prior to the HVC instruction. The return values are updated with + * the content from register 0 to 3 on return from the HVC instruction. + */ +asmlinkage void arm_smccc_hvc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, unsigned long a4, + unsigned long a5, unsigned long a6, unsigned long a7, + struct arm_smccc_res *res); + +#endif /*__LINUX_ARM_SMCCC_H*/ -- cgit v1.2.3 From 80d2b4c95b2c4e5b8c143703150ab67b8a198e63 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 1 Feb 2016 18:01:30 +0100 Subject: ARM: 8511/1: ARM64: kernel: PSCI: move PSCI idle management code to drivers/firmware ARM64 PSCI kernel interfaces that initialize idle states and implement the suspend API to enter them are generic and can be shared with the ARM architecture. To achieve that goal, this patch moves ARM64 PSCI idle management code to drivers/firmware, so that the interface to initialize and enter idle states can actually be shared by ARM and ARM64 arches back-ends. The ARM generic CPUidle implementation also requires the definition of a cpuidle_ops section entry for the kernel to initialize the CPUidle operations at boot based on the enable-method (ie ARM64 has the statically initialized cpu_ops counterparts for that purpose); therefore this patch also adds the required section entry on CONFIG_ARM for PSCI so that the kernel can initialize the PSCI CPUidle back-end when PSCI is the probed enable-method. On ARM64 this patch provides no functional change. Signed-off-by: Lorenzo Pieralisi Acked-by: Daniel Lezcano Acked-by: Catalin Marinas [arch/arm64] Acked-by: Mark Rutland Tested-by: Jisheng Zhang Cc: Will Deacon Cc: Sudeep Holla Cc: Daniel Lezcano Cc: Catalin Marinas Cc: Mark Rutland Cc: Jisheng Zhang Signed-off-by: Russell King (cherry picked from commit 8b6f2499ac45d5a0ab2e4b6f9613ab3f60416be1) Signed-off-by: Alex Shi --- include/linux/psci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psci.h b/include/linux/psci.h index 12c4865457ad..393efe2edf9a 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -24,6 +24,9 @@ bool psci_tos_resident_on(int cpu); bool psci_power_state_loses_context(u32 state); bool psci_power_state_is_valid(u32 state); +int psci_cpu_init_idle(unsigned int cpu); +int psci_cpu_suspend_enter(unsigned long index); + struct psci_operations { int (*cpu_suspend)(u32 state, unsigned long entry_point); int (*cpu_off)(u32 state); -- cgit v1.2.3 From b3b4283f7e46f563b438e587a0a567118a45a3fd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: vfs: move permission checking into notify_change() for utimes(NULL) commit f2b20f6ee842313a0d681dbbf7f87b70291a6a3b upstream. This fixes a bug where the permission was not properly checked in overlayfs. The testcase is ltp/utimensat01. It is also cleaner and safer to do the permission checking in the vfs helper instead of the caller. This patch introduces an additional ia_valid flag ATTR_TOUCH (since touch(1) is the most obvious user of utimes(NULL)) that is passed into notify_change whenever the conditions for this special permission checking mode are met. Reported-by: Aihua Zhang Signed-off-by: Miklos Szeredi Tested-by: Aihua Zhang Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0166582c4d78..e1a123760dbf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -226,6 +226,7 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define ATTR_KILL_PRIV (1 << 14) #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) +#define ATTR_TOUCH (1 << 17) /* * Whiteout is represented by a char device. The following constants define the -- cgit v1.2.3 From 6ddbd662d0bac4e4cb998b4c1fa44cd7c679c3cb Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 6 Oct 2016 15:53:38 -0700 Subject: CHROMIUM: remove Android's cgroup generic permissions checks The implementation is utterly broken, resulting in all processes being allows to move tasks between sets (as long as they have access to the "tasks" attribute), and upstream is heading towards checking only capability anyway, so let's get rid of this code. BUG=b:31790445,chromium:647994 TEST=Boot android container, examine logcat Change-Id: I2f780a5992c34e52a8f2d0b3557fc9d490da2779 Signed-off-by: Dmitry Torokhov Reviewed-on: https://chromium-review.googlesource.com/394967 Reviewed-by: Ricky Zhou Reviewed-by: John Stultz --- include/linux/cgroup-defs.h | 1 - include/linux/cgroup.h | 14 -------------- 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 788c7c49a673..8da263299754 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -431,7 +431,6 @@ struct cgroup_subsys { void (*css_reset)(struct cgroup_subsys_state *css); void (*css_e_css_changed)(struct cgroup_subsys_state *css); - int (*allow_attach)(struct cgroup_taskset *tset); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 70358b9f5a7a..cb91b44f5f78 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -528,16 +528,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } -/* - * Default Android check for whether the current process is allowed to move a - * task across cgroups, either because CAP_SYS_NICE is set or because the uid - * of the calling process is the same as the moved task or because we are - * running as root. - * Returns 0 if this is allowed, or -EACCES otherwise. - */ -int subsys_cgroup_allow_attach(struct cgroup_taskset *tset); - - #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -562,10 +552,6 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } -static inline int subsys_cgroup_allow_attach(void *tset) -{ - return -EINVAL; -} #endif /* !CONFIG_CGROUPS */ #endif /* _LINUX_CGROUP_H */ -- cgit v1.2.3 From 8bfd0c41d2ed123729d30768dde2a189dc6fd181 Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Thu, 4 Aug 2016 19:32:33 +0900 Subject: PM / devfreq: event: remove duplicate devfreq_event_get_drvdata() commit c8a9a6daccad495c48d5435d3487956ce01bc6a1 upstream. there define two devfreq_event_get_drvdata() function in devfreq-event.h when disable CONFIG_PM_DEVFREQ_EVENT, it will lead to build fail. So remove devfreq_event_get_drvdata() function. Fixes: f262f28c1470 ("PM / devfreq: event: Add devfreq_event class") Signed-off-by: Lin Huang Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham Signed-off-by: Greg Kroah-Hartman --- include/linux/devfreq-event.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h index 0a83a1e648b0..4db00b02ca3f 100644 --- a/include/linux/devfreq-event.h +++ b/include/linux/devfreq-event.h @@ -148,11 +148,6 @@ static inline int devfreq_event_reset_event(struct devfreq_event_dev *edev) return -EINVAL; } -static inline void *devfreq_event_get_drvdata(struct devfreq_event_dev *edev) -{ - return ERR_PTR(-EINVAL); -} - static inline struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( struct device *dev, int index) { -- cgit v1.2.3 From e0d61779d3fe56bf139c6e4c8381d9dd3eddc1f0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Mar 2016 14:22:50 -0700 Subject: lib: move strtobool() to kstrtobool() commit ef951599074ba4fad2d0efa0a977129b41e6d203 upstream. Create the kstrtobool_from_user() helper and move strtobool() logic into the new kstrtobool() (matching all the other kstrto* functions). Provides an inline wrapper for existing strtobool() callers. Signed-off-by: Kees Cook Cc: Joe Perches Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Daniel Borkmann Cc: Amitkumar Karwar Cc: Nishant Sarmukadam Cc: Kalle Valo Cc: Steve French Cc: Michael Ellerman Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel.h | 2 ++ include/linux/string.h | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e571e592e53a..50220cab738c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -356,6 +356,7 @@ int __must_check kstrtou16(const char *s, unsigned int base, u16 *res); int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); +int __must_check kstrtobool(const char *s, bool *res); int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); @@ -367,6 +368,7 @@ int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigne int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); +int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res); static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) { diff --git a/include/linux/string.h b/include/linux/string.h index 9ef7795e65e4..aa30789b0f65 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -127,7 +127,11 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); -extern int strtobool(const char *s, bool *res); +extern int kstrtobool(const char *s, bool *res); +static inline int strtobool(const char *s, bool *res) +{ + return kstrtobool(s, res); +} #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); -- cgit v1.2.3 From f6031d95320dc2930f1b813e98533a01c92f3dc0 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Tue, 11 Oct 2016 13:54:50 -0700 Subject: ipc/sem.c: fix complex_count vs. simple op race commit 5864a2fd3088db73d47942370d0f7210a807b9bc upstream. Commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") introduced a race: sem_lock has a fast path that allows parallel simple operations. There are two reasons why a simple operation cannot run in parallel: - a non-simple operations is ongoing (sma->sem_perm.lock held) - a complex operation is sleeping (sma->complex_count != 0) As both facts are stored independently, a thread can bypass the current checks by sleeping in the right positions. See below for more details (or kernel bugzilla 105651). The patch fixes that by creating one variable (complex_mode) that tracks both reasons why parallel operations are not possible. The patch also updates stale documentation regarding the locking. With regards to stable kernels: The patch is required for all kernels that include the commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") (3.10?) The alternative is to revert the patch that introduced the race. The patch is safe for backporting, i.e. it makes no assumptions about memory barriers in spin_unlock_wait(). Background: Here is the race of the current implementation: Thread A: (simple op) - does the first "sma->complex_count == 0" test Thread B: (complex op) - does sem_lock(): This includes an array scan. But the scan can't find Thread A, because Thread A does not own sem->lock yet. - the thread does the operation, increases complex_count, drops sem_lock, sleeps Thread A: - spin_lock(&sem->lock), spin_is_locked(sma->sem_perm.lock) - sleeps before the complex_count test Thread C: (complex op) - does sem_lock (no array scan, complex_count==1) - wakes up Thread B. - decrements complex_count Thread A: - does the complex_count test Bug: Now both thread A and thread C operate on the same array, without any synchronization. Fixes: 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") Link: http://lkml.kernel.org/r/1469123695-5661-1-git-send-email-manfred@colorfullife.com Reported-by: Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Davidlohr Bueso Cc: Thomas Gleixner Cc: Ingo Molnar Cc: <1vier1@web.de> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/sem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sem.h b/include/linux/sem.h index 976ce3a19f1b..d0efd6e6c20a 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -21,6 +21,7 @@ struct sem_array { struct list_head list_id; /* undo requests on this array */ int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ + bool complex_mode; /* no parallel simple ops */ }; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From f2e81c026017222c18b178f175adf415c276b131 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Jan 2016 13:04:11 -0700 Subject: lightnvm: ensure that nvm_dev_ops can be used without CONFIG_NVM commit a7fd9a4f3e8179bab31e4637236ebb0e0b7867c6 upstream. null_blk defines an empty version of this ops structure if CONFIG_NVM isn't set, but it doesn't know the type. Move those bits out of the protection of CONFIG_NVM in the main lightnvm include. Signed-off-by: Jens Axboe [pebolle: backport to v4.4] Signed-off-by: Paul Bolle Signed-off-by: Greg Kroah-Hartman --- include/linux/lightnvm.h | 121 +++++++++++++++++++++++++---------------------- 1 file changed, 64 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index f09648d14694..782d4e814e21 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -1,6 +1,8 @@ #ifndef NVM_H #define NVM_H +#include + enum { NVM_IO_OK = 0, NVM_IO_REQUEUE = 1, @@ -11,10 +13,71 @@ enum { NVM_IOTYPE_GC = 1, }; +#define NVM_BLK_BITS (16) +#define NVM_PG_BITS (16) +#define NVM_SEC_BITS (8) +#define NVM_PL_BITS (8) +#define NVM_LUN_BITS (8) +#define NVM_CH_BITS (8) + +struct ppa_addr { + /* Generic structure for all addresses */ + union { + struct { + u64 blk : NVM_BLK_BITS; + u64 pg : NVM_PG_BITS; + u64 sec : NVM_SEC_BITS; + u64 pl : NVM_PL_BITS; + u64 lun : NVM_LUN_BITS; + u64 ch : NVM_CH_BITS; + } g; + + u64 ppa; + }; +}; + +struct nvm_rq; +struct nvm_id; +struct nvm_dev; + +typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); +typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); +typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); +typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, + nvm_l2p_update_fn *, void *); +typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, + nvm_bb_update_fn *, void *); +typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); +typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); +typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); +typedef void (nvm_destroy_dma_pool_fn)(void *); +typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, + dma_addr_t *); +typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); + +struct nvm_dev_ops { + nvm_id_fn *identity; + nvm_get_l2p_tbl_fn *get_l2p_tbl; + nvm_op_bb_tbl_fn *get_bb_tbl; + nvm_op_set_bb_fn *set_bb_tbl; + + nvm_submit_io_fn *submit_io; + nvm_erase_blk_fn *erase_block; + + nvm_create_dma_pool_fn *create_dma_pool; + nvm_destroy_dma_pool_fn *destroy_dma_pool; + nvm_dev_dma_alloc_fn *dev_dma_alloc; + nvm_dev_dma_free_fn *dev_dma_free; + + unsigned int max_phys_sect; +}; + + + #ifdef CONFIG_NVM #include -#include #include #include @@ -126,29 +189,6 @@ struct nvm_tgt_instance { #define NVM_VERSION_MINOR 0 #define NVM_VERSION_PATCH 0 -#define NVM_BLK_BITS (16) -#define NVM_PG_BITS (16) -#define NVM_SEC_BITS (8) -#define NVM_PL_BITS (8) -#define NVM_LUN_BITS (8) -#define NVM_CH_BITS (8) - -struct ppa_addr { - /* Generic structure for all addresses */ - union { - struct { - u64 blk : NVM_BLK_BITS; - u64 pg : NVM_PG_BITS; - u64 sec : NVM_SEC_BITS; - u64 pl : NVM_PL_BITS; - u64 lun : NVM_LUN_BITS; - u64 ch : NVM_CH_BITS; - } g; - - u64 ppa; - }; -}; - struct nvm_rq { struct nvm_tgt_instance *ins; struct nvm_dev *dev; @@ -182,39 +222,6 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata) struct nvm_block; -typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); -typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); -typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); -typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, - nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, - nvm_bb_update_fn *, void *); -typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); -typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); -typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); -typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); -typedef void (nvm_destroy_dma_pool_fn)(void *); -typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, - dma_addr_t *); -typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); - -struct nvm_dev_ops { - nvm_id_fn *identity; - nvm_get_l2p_tbl_fn *get_l2p_tbl; - nvm_op_bb_tbl_fn *get_bb_tbl; - nvm_op_set_bb_fn *set_bb_tbl; - - nvm_submit_io_fn *submit_io; - nvm_erase_blk_fn *erase_block; - - nvm_create_dma_pool_fn *create_dma_pool; - nvm_destroy_dma_pool_fn *destroy_dma_pool; - nvm_dev_dma_alloc_fn *dev_dma_alloc; - nvm_dev_dma_free_fn *dev_dma_free; - - unsigned int max_phys_sect; -}; - struct nvm_lun { int id; -- cgit v1.2.3 From f84311d7cd04cb1da9f0192417a584543be879a3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 30 Sep 2016 15:11:29 -0700 Subject: mm: workingset: fix crash in shadow node shrinker caused by replace_page_cache_page() commit 22f2ac51b6d643666f4db093f13144f773ff3f3a upstream. Antonio reports the following crash when using fuse under memory pressure: kernel BUG at /build/linux-a2WvEb/linux-4.4.0/mm/workingset.c:346! invalid opcode: 0000 [#1] SMP Modules linked in: all of them CPU: 2 PID: 63 Comm: kswapd0 Not tainted 4.4.0-36-generic #55-Ubuntu Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3904 04/27/2013 task: ffff88040cae6040 ti: ffff880407488000 task.ti: ffff880407488000 RIP: shadow_lru_isolate+0x181/0x190 Call Trace: __list_lru_walk_one.isra.3+0x8f/0x130 list_lru_walk_one+0x23/0x30 scan_shadow_nodes+0x34/0x50 shrink_slab.part.40+0x1ed/0x3d0 shrink_zone+0x2ca/0x2e0 kswapd+0x51e/0x990 kthread+0xd8/0xf0 ret_from_fork+0x3f/0x70 which corresponds to the following sanity check in the shadow node tracking: BUG_ON(node->count & RADIX_TREE_COUNT_MASK); The workingset code tracks radix tree nodes that exclusively contain shadow entries of evicted pages in them, and this (somewhat obscure) line checks whether there are real pages left that would interfere with reclaim of the radix tree node under memory pressure. While discussing ways how fuse might sneak pages into the radix tree past the workingset code, Miklos pointed to replace_page_cache_page(), and indeed there is a problem there: it properly accounts for the old page being removed - __delete_from_page_cache() does that - but then does a raw raw radix_tree_insert(), not accounting for the replacement page. Eventually the page count bits in node->count underflow while leaving the node incorrectly linked to the shadow node LRU. To address this, make sure replace_page_cache_page() uses the tracked page insertion code, page_cache_tree_insert(). This fixes the page accounting and makes sure page-containing nodes are properly unlinked from the shadow node LRU again. Also, make the sanity checks a bit less obscure by using the helpers for checking the number of pages and shadows in a radix tree node. [mhocko@suse.com: backport for 4.4] Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check") Link: http://lkml.kernel.org/r/20160919155822.29498-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Antonio SJ Musumeci Debugged-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- include/linux/swap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7ba7dccaf0e7..b28de19aadbf 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -266,6 +266,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node) static inline void workingset_node_pages_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_pages(node)); node->count--; } @@ -281,6 +282,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node) static inline void workingset_node_shadows_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_shadows(node)); node->count -= 1U << RADIX_TREE_COUNT_SHIFT; } -- cgit v1.2.3 From e765b192093d3b7fc8899bd33b0867492a405ba0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Oct 2016 21:03:48 -0700 Subject: Using BUG_ON() as an assert() is _never_ acceptable commit 21f54ddae449f4bdd9f1498124901d67202243d9 upstream. That just generally kills the machine, and makes debugging only much harder, since the traces may long be gone. Debugging by assert() is a disease. Don't do it. If you can continue, you're much better off doing so with a live machine where you have a much higher chance that the report actually makes it to the system logs, rather than result in a machine that is just completely dead. The only valid situation for BUG_ON() is when continuing is not an option, because there is massive corruption. But if you are just verifying that something is true, you warn about your broken assumptions (preferably just once), and limp on. Fixes: 22f2ac51b6d6 ("mm: workingset: fix crash in shadow node shrinker caused by replace_page_cache_page()") Cc: Johannes Weiner Cc: Miklos Szeredi Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index b28de19aadbf..d8ca2eaa3a8b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -266,7 +266,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node) static inline void workingset_node_pages_dec(struct radix_tree_node *node) { - VM_BUG_ON(!workingset_node_pages(node)); + VM_WARN_ON_ONCE(!workingset_node_pages(node)); node->count--; } @@ -282,7 +282,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node) static inline void workingset_node_shadows_dec(struct radix_tree_node *node) { - VM_BUG_ON(!workingset_node_shadows(node)); + VM_WARN_ON_ONCE(!workingset_node_shadows(node)); node->count -= 1U << RADIX_TREE_COUNT_SHIFT; } -- cgit v1.2.3 From 76fd38eae33765b92aefa73aaf6cca3f22a3faa3 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 17 Oct 2016 16:00:46 +0100 Subject: irqchip/gic-v3-its: Fix entry size mask for GITS_BASER commit 9224eb77e63f70f16c0b6b7a20ca7d395f3bc077 upstream. Entry Size in GITS_BASER occupies 5 bits [52:48], but we mask out 8 bits. Fixes: cc2d3216f53c ("irqchip: GICv3: ITS command queue") Signed-off-by: Vladimir Murzin Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index e98425058f20..54048f336a1f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -218,7 +218,7 @@ #define GITS_BASER_TYPE_SHIFT (56) #define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) #define GITS_BASER_ENTRY_SIZE_SHIFT (48) -#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0xff) + 1) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_BASER_NonShareable (0UL << 10) #define GITS_BASER_InnerShareable (1UL << 10) #define GITS_BASER_OuterShareable (2UL << 10) -- cgit v1.2.3 From 57c9cfdb61ea270936fab76da99a742c6ef0b86f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 17:39:09 +0200 Subject: posix_acl: Clear SGID bit when setting file permissions commit 073931017b49d9458aa351605b43a7e34598caef upstream. When file permissions are modified via chmod(2) and the user is not in the owning group or capable of CAP_FSETID, the setgid bit is cleared in inode_change_ok(). Setting a POSIX ACL via setxattr(2) sets the file permissions as well as the new ACL, but doesn't clear the setgid bit in a similar way; this allows to bypass the check in chmod(2). Fix that. References: CVE-2016-7097 Reviewed-by: Christoph Hellwig Reviewed-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- include/linux/posix_acl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 3e96a6a76103..d1a8ad7e5ae4 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -95,6 +95,7 @@ extern int set_posix_acl(struct inode *, int, struct posix_acl *); extern int posix_acl_chmod(struct inode *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, struct posix_acl **); +extern int posix_acl_update_mode(struct inode *, umode_t *, struct posix_acl **); extern int simple_set_acl(struct inode *, struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); -- cgit v1.2.3 From 5699b3431e0b14736867484b8669ead2d40f575e Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Mar 2016 09:32:01 -0700 Subject: tunnels: Don't apply GRO to multiple layers of encapsulation. commit fac8e0f579695a3ecbc4d3cac369139d7f819971 upstream. When drivers express support for TSO of encapsulated packets, they only mean that they can do it for one layer of encapsulation. Supporting additional levels would mean updating, at a minimum, more IP length fields and they are unaware of this. No encapsulation device expresses support for handling offloaded encapsulated packets, so we won't generate these types of frames in the transmit path. However, GRO doesn't have a check for multiple levels of encapsulation and will attempt to build them. UDP tunnel GRO actually does prevent this situation but it only handles multiple UDP tunnels stacked on top of each other. This generalizes that solution to prevent any kind of tunnel stacking that would cause problems. Fixes: bf5a755f ("net-gre-gro: Add GRE support to the GRO stack") Signed-off-by: Jesse Gross Signed-off-by: David S. Miller Signed-off-by: Juerg Haefliger Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4e9c75226f07..12b4d54a8ffa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1986,8 +1986,8 @@ struct napi_gro_cb { /* This is non-zero if the packet may be of the same flow. */ u8 same_flow:1; - /* Used in udp_gro_receive */ - u8 udp_mark:1; + /* Used in tunnel GRO receive */ + u8 encap_mark:1; /* GRO checksum is valid */ u8 csum_valid:1; -- cgit v1.2.3 From 4b06152a4822ef10a2b695d6fc75303ad657096b Mon Sep 17 00:00:00 2001 From: David Hsu Date: Tue, 9 Aug 2016 14:57:46 -0700 Subject: pwm: Unexport children before chip removal commit 0733424c9ba9f42242409d1ece780777272f7ea1 upstream. Exported pwm channels aren't removed before the pwmchip and are leaked. This results in invalid sysfs files. This fix removes all exported pwm channels before chip removal. Signed-off-by: David Hsu Fixes: 76abbdde2d95 ("pwm: Add sysfs interface") Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- include/linux/pwm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index cfc3ed46cad2..aa8736d5b2f3 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -331,6 +331,7 @@ static inline void pwm_remove_table(struct pwm_lookup *table, size_t num) #ifdef CONFIG_PWM_SYSFS void pwmchip_sysfs_export(struct pwm_chip *chip); void pwmchip_sysfs_unexport(struct pwm_chip *chip); +void pwmchip_sysfs_unexport_children(struct pwm_chip *chip); #else static inline void pwmchip_sysfs_export(struct pwm_chip *chip) { @@ -339,6 +340,10 @@ static inline void pwmchip_sysfs_export(struct pwm_chip *chip) static inline void pwmchip_sysfs_unexport(struct pwm_chip *chip) { } + +static inline void pwmchip_sysfs_unexport_children(struct pwm_chip *chip) +{ +} #endif /* CONFIG_PWM_SYSFS */ #endif /* __LINUX_PWM_H */ -- cgit v1.2.3 From 6eb0061fa630ae97c733a4dcbe3e23333ebe8626 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 25 Sep 2016 23:08:31 +0200 Subject: ipmr, ip6mr: fix scheduling while atomic and a deadlock with ipmr_get_route [ Upstream commit 2cf750704bb6d7ed8c7d732e071dd1bc890ea5e8 ] Since the commit below the ipmr/ip6mr rtnl_unicast() code uses the portid instead of the previous dst_pid which was copied from in_skb's portid. Since the skb is new the portid is 0 at that point so the packets are sent to the kernel and we get scheduling while atomic or a deadlock (depending on where it happens) by trying to acquire rtnl two times. Also since this is RTM_GETROUTE, it can be triggered by a normal user. Here's the sleeping while atomic trace: [ 7858.212557] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 [ 7858.212748] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0 [ 7858.212881] 2 locks held by swapper/0/0: [ 7858.213013] #0: (((&mrt->ipmr_expire_timer))){+.-...}, at: [] call_timer_fn+0x5/0x350 [ 7858.213422] #1: (mfc_unres_lock){+.....}, at: [] ipmr_expire_process+0x25/0x130 [ 7858.213807] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc7+ #179 [ 7858.213934] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 7858.214108] 0000000000000000 ffff88005b403c50 ffffffff813a7804 0000000000000000 [ 7858.214412] ffffffff81a1338e ffff88005b403c78 ffffffff810a4a72 ffffffff81a1338e [ 7858.214716] 000000000000026c 0000000000000000 ffff88005b403ca8 ffffffff810a4b9f [ 7858.215251] Call Trace: [ 7858.215412] [] dump_stack+0x85/0xc1 [ 7858.215662] [] ___might_sleep+0x192/0x250 [ 7858.215868] [] __might_sleep+0x6f/0x100 [ 7858.216072] [] mutex_lock_nested+0x33/0x4d0 [ 7858.216279] [] ? netlink_lookup+0x25f/0x460 [ 7858.216487] [] rtnetlink_rcv+0x1b/0x40 [ 7858.216687] [] netlink_unicast+0x19c/0x260 [ 7858.216900] [] rtnl_unicast+0x20/0x30 [ 7858.217128] [] ipmr_destroy_unres+0xa9/0xf0 [ 7858.217351] [] ipmr_expire_process+0x8f/0x130 [ 7858.217581] [] ? ipmr_net_init+0x180/0x180 [ 7858.217785] [] ? ipmr_net_init+0x180/0x180 [ 7858.217990] [] call_timer_fn+0xa5/0x350 [ 7858.218192] [] ? call_timer_fn+0x5/0x350 [ 7858.218415] [] ? ipmr_net_init+0x180/0x180 [ 7858.218656] [] run_timer_softirq+0x260/0x640 [ 7858.218865] [] ? __do_softirq+0xbb/0x54f [ 7858.219068] [] __do_softirq+0xe8/0x54f [ 7858.219269] [] irq_exit+0xb8/0xc0 [ 7858.219463] [] smp_apic_timer_interrupt+0x42/0x50 [ 7858.219678] [] apic_timer_interrupt+0x8c/0xa0 [ 7858.219897] [] ? native_safe_halt+0x6/0x10 [ 7858.220165] [] ? trace_hardirqs_on+0xd/0x10 [ 7858.220373] [] default_idle+0x23/0x190 [ 7858.220574] [] arch_cpu_idle+0xf/0x20 [ 7858.220790] [] default_idle_call+0x4c/0x60 [ 7858.221016] [] cpu_startup_entry+0x39b/0x4d0 [ 7858.221257] [] rest_init+0x135/0x140 [ 7858.221469] [] start_kernel+0x50e/0x51b [ 7858.221670] [] ? early_idt_handler_array+0x120/0x120 [ 7858.221894] [] x86_64_start_reservations+0x2a/0x2c [ 7858.222113] [] x86_64_start_kernel+0x13b/0x14a Fixes: 2942e9005056 ("[RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/mroute.h | 2 +- include/linux/mroute6.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 79aaa9fc1a15..d5277fc3ce2e 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -103,5 +103,5 @@ struct mfc_cache { struct rtmsg; extern int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #endif diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 66982e764051..f831155dc7d1 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -115,7 +115,7 @@ struct mfc6_cache { struct rtmsg; extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #ifdef CONFIG_IPV6_MROUTE extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); -- cgit v1.2.3 From 3cb00b90e8b1bd59382f5e1304dd751f9674f027 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 20 Oct 2016 15:58:02 +0200 Subject: net: add recursion limit to GRO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fcd91dd449867c6bfe56a81cabba76b829fd05cd ] Currently, GRO can do unlimited recursion through the gro_receive handlers. This was fixed for tunneling protocols by limiting tunnel GRO to one level with encap_mark, but both VLAN and TEB still have this problem. Thus, the kernel is vulnerable to a stack overflow, if we receive a packet composed entirely of VLAN headers. This patch adds a recursion counter to the GRO layer to prevent stack overflow. When a gro_receive function hits the recursion limit, GRO is aborted for this skb and it is processed normally. This recursion counter is put in the GRO CB, but could be turned into a percpu counter if we run out of space in the CB. Thanks to Vladimír Beneš for the initial bug report. Fixes: CVE-2016-7039 Fixes: 9b174d88c257 ("net: Add Transparent Ethernet Bridging GRO support.") Fixes: 66e5133f19e9 ("vlan: Add GRO support for non hardware accelerated vlan") Signed-off-by: Sabrina Dubroca Reviewed-by: Jiri Benc Acked-by: Hannes Frederic Sowa Acked-by: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12b4d54a8ffa..9d6025703f73 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2003,7 +2003,10 @@ struct napi_gro_cb { /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; - /* 7 bit hole */ + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* 3 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2014,6 +2017,25 @@ struct napi_gro_cb { #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *); +static inline struct sk_buff **call_gro_receive(gro_receive_t cb, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ @@ -2059,6 +2081,22 @@ struct udp_offload { struct udp_offload_callbacks callbacks; }; +typedef struct sk_buff **(*gro_receive_udp_t)(struct sk_buff **, + struct sk_buff *, + struct udp_offload *); +static inline struct sk_buff **call_gro_receive_udp(gro_receive_udp_t cb, + struct sk_buff **head, + struct sk_buff *skb, + struct udp_offload *uoff) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb, uoff); +} + /* often modified stats are per cpu, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; -- cgit v1.2.3 From 225a24ae97331f3b9d97c1bb97b1e30b3633bcf4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Nov 2016 13:12:35 -0800 Subject: tcp: take care of truncations done by sk_filter() [ Upstream commit ac6e780070e30e4c35bd395acfe9191e6268bdd3 ] With syzkaller help, Marco Grassi found a bug in TCP stack, crashing in tcp_collapse() Root cause is that sk_filter() can truncate the incoming skb, but TCP stack was not really expecting this to happen. It probably was expecting a simple DROP or ACCEPT behavior. We first need to make sure no part of TCP header could be removed. Then we need to adjust TCP_SKB_CB(skb)->end_seq Many thanks to syzkaller team and Marco for giving us a reproducer. Signed-off-by: Eric Dumazet Reported-by: Marco Grassi Reported-by: Vladis Dronov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/filter.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 5110d4211866..ccb98b459c59 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -421,7 +421,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) } #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ -int sk_filter(struct sock *sk, struct sk_buff *skb); +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); +static inline int sk_filter(struct sock *sk, struct sk_buff *skb) +{ + return sk_filter_trim_cap(sk, skb, 1); +} int bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); -- cgit v1.2.3 From aaaf9e59c00aa2ff9c2bd1da05c0cf1ba2a9634a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:20:15 -0800 Subject: mm: page_alloc: generalize the dirty balance reserve The dirty balance reserve that dirty throttling has to consider is merely memory not available to userspace allocations. There is nothing writeback-specific about it. Generalize the name so that it's reusable outside of that context. Signed-off-by: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit a8d0143730d7b42c9fe6d1435d92ecce6863a62a) Signed-off-by: Alex Shi --- include/linux/mmzone.h | 6 +++--- include/linux/swap.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e23a9e704536..9134ae3f61ff 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -361,10 +361,10 @@ struct zone { struct per_cpu_pageset __percpu *pageset; /* - * This is a per-zone reserve of pages that should not be - * considered dirtyable memory. + * This is a per-zone reserve of pages that are not available + * to userspace allocations. */ - unsigned long dirty_balance_reserve; + unsigned long totalreserve_pages; #ifndef CONFIG_SPARSEMEM /* diff --git a/include/linux/swap.h b/include/linux/swap.h index d8ca2eaa3a8b..f1a52c11de0e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -289,7 +289,6 @@ static inline void workingset_node_shadows_dec(struct radix_tree_node *node) /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; -extern unsigned long dirty_balance_reserve; extern unsigned long nr_free_buffer_pages(void); extern unsigned long nr_free_pagecache_pages(void); -- cgit v1.2.3 From cd5367ae02a488450b714a5d5f47afb014ea6541 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Nov 2015 11:13:34 -0500 Subject: cgroup: replace __DEVEL__sane_behavior with cgroup2 fs type With major controllers - cpu, memory and io - shaping up for the unified hierarchy, cgroup2 is about ready to be, gradually, released into the wild. Replace __DEVEL__sane_behavior flag which was used to select the unified hierarchy with a separate filesystem type "cgroup2" so that unified hierarchy can be mounted as follows. mount -t cgroup2 none $MOUNT_POINT The cgroup2 fs has its own magic number - 0x63677270 ("cgrp"). v2: Assign a different magic number to cgroup2 fs. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner (cherry picked from commit 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff) Signed-off-by: Alex Shi --- include/linux/cgroup-defs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8da263299754..4cd5c95d1ca0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -66,7 +66,6 @@ enum { /* cgroup_root->flags */ enum { - CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ }; -- cgit v1.2.3 From bcddfb47bf5d54c3b312e165374bed0317d5a767 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 28 Nov 2016 14:35:22 -0800 Subject: UPSTREAM: timekeeping: Add a fast and NMI safe boot clock This boot clock can be used as a tracing clock and will account for suspend time. To keep it NMI safe since we're accessing from tracing, we're not using a separate timekeeper with updates to monotonic clock and boot offset protected with seqlocks. This has the following minor side effects: (1) Its possible that a timestamp be taken after the boot offset is updated but before the timekeeper is updated. If this happens, the new boot offset is added to the old timekeeping making the clock appear to update slightly earlier: CPU 0 CPU 1 timekeeping_inject_sleeptime64() __timekeeping_inject_sleeptime(tk, delta); timestamp(); timekeeping_update(tk, TK_CLEAR_NTP...); (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be partially updated. Since the tk->offs_boot update is a rare event, this should be a rare occurrence which postprocessing should be able to handle. Bug: b/33184060 Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Richard Cochran Cc: Prarit Bhargava Reviewed-by: Thomas Gleixner Signed-off-by: Joel Fernandes Signed-off-by: John Stultz --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ec89d846324c..b7246d2ed7c9 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -233,6 +233,7 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); +extern u64 ktime_get_boot_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones -- cgit v1.2.3 From d88a1bd00cfa676163853ed6017922abd0cb3f39 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Sep 2016 10:49:11 +0800 Subject: iommu/vt-d: Fix PASID table allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 910170442944e1f8674fd5ddbeeb8ccd1877ea98 upstream. Somehow I ended up with an off-by-three error in calculating the size of the PASID and PASID State tables, which triggers allocations failures as those tables unfortunately have to be physically contiguous. In fact, even the *correct* maximum size of 8MiB is problematic and is wont to lead to allocation failures. Since I have extracted a promise that this *will* be fixed in hardware, I'm happy to limit it on the current hardware to a maximum of 0x20000 PASIDs, which gives us 1MiB tables — still not ideal, but better than before. Reported by Mika Kuoppala and also by Xunlei Pang who submitted a simpler patch to fix only the allocation (and not the free) to the "correct" limit... which was still problematic. Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2d9b650047a5..d49e26c6cdc7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -429,6 +429,7 @@ struct intel_iommu { struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ struct idr pasid_idr; + u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ -- cgit v1.2.3 From 995761627d97cc374f14203589b3a6385019bf0b Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 30 Nov 2016 15:54:13 -0800 Subject: kasan: update kasan_global for gcc 7 commit 045d599a286bc01daa3510d59272440a17b23c2e upstream. kasan_global struct is part of compiler/runtime ABI. gcc revision 241983 has added a new field to kasan_global struct. Update kernel definition of kasan_global struct to include the new field. Without this patch KASAN is broken with gcc 7. Link: http://lkml.kernel.org/r/1479219743-28682-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index eeae401a2412..287e698c28de 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -251,7 +251,9 @@ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ -#if GCC_VERSION >= 50000 +#if GCC_VERSION >= 70000 +#define KASAN_ABI_VERSION 5 +#elif GCC_VERSION >= 50000 #define KASAN_ABI_VERSION 4 #elif GCC_VERSION >= 40902 #define KASAN_ABI_VERSION 3 -- cgit v1.2.3 From 140ff0a348970430e4eb06a63afbbe5510f9c8d1 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 2 Nov 2016 16:35:51 -0600 Subject: PCI: Export pcie_find_root_port commit e784930bd645e7df78c66e7872fec282b0620075 upstream. Export pcie_find_root_port() so we can use it outside of PCIe-AER error injection. Signed-off-by: Johannes Thumshirn Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e89c7ee7e803..5f37614f2451 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1802,6 +1802,20 @@ static inline int pci_pcie_type(const struct pci_dev *dev) return (pcie_caps_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4; } +static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) +{ + while (1) { + if (!pci_is_pcie(dev)) + break; + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + return dev; + if (!dev->bus->self) + break; + dev = dev->bus->self; + } + return NULL; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, -- cgit v1.2.3 From fd1aa12c63400ca7d5b6d189ae98570f47735180 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Oct 2016 13:57:37 -0400 Subject: constify iov_iter_count() and iter_is_iovec() commit b57332b4105abf1d518d93886e547ee2f98cd414 upstream. [stable note, need this to prevent build warning in commit a0ac402cfcdc904f9772e1762b3fda112dcc56a0] Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- include/linux/uio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 5f9c59da978b..e2225109b816 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -101,12 +101,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); -static inline size_t iov_iter_count(struct iov_iter *i) +static inline size_t iov_iter_count(const struct iov_iter *i) { return i->count; } -static inline bool iter_is_iovec(struct iov_iter *i) +static inline bool iter_is_iovec(const struct iov_iter *i) { return !(i->type & (ITER_BVEC | ITER_KVEC)); } -- cgit v1.2.3