From 2f59c12b47c187c8c221f11ee139365f9bb2b334 Mon Sep 17 00:00:00 2001 From: Chen Jie Date: Fri, 15 Mar 2019 03:44:38 +0000 Subject: futex: Ensure that futex address is aligned in handle_futex_death() commit 5a07168d8d89b00fe1760120714378175b3ef992 upstream. The futex code requires that the user space addresses of futexes are 32bit aligned. sys_futex() checks this in futex_get_keys() but the robust list code has no alignment check in place. As a consequence the kernel crashes on architectures with strict alignment requirements in handle_futex_death() when trying to cmpxchg() on an unaligned futex address which was retrieved from the robust list. [ tglx: Rewrote changelog, proper sizeof() based alignement check and add comment ] Fixes: 0771dfefc9e5 ("[PATCH] lightweight robust futexes: core") Signed-off-by: Chen Jie Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1552621478-119787-1-git-send-email-chenjie6@huawei.com Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 0c92c8d34ffa..ec9df5ba040b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3067,6 +3067,10 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; + /* Futex address must be 32bit aligned */ + if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) + return -1; + retry: if (get_user(uval, uaddr)) return -1; -- cgit v1.2.3 From 4aada79c6793c59e484b69fd4ed591396e2d4b39 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 9 Jan 2019 23:03:25 -0500 Subject: locking/lockdep: Add debug_locks check in __lock_downgrade() commit 71492580571467fb7177aade19c18ce7486267f5 upstream. Tetsuo Handa had reported he saw an incorrect "downgrading a read lock" warning right after a previous lockdep warning. It is likely that the previous warning turned off lock debugging causing the lockdep to have inconsistency states leading to the lock downgrade warning. Fix that by add a check for debug_locks at the beginning of __lock_downgrade(). Debugged-by: Tetsuo Handa Reported-by: Tetsuo Handa Reported-by: syzbot+53383ae265fb161ef488@syzkaller.appspotmail.com Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/r/1547093005-26085-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/locking/lockdep.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 774ab79d3ec7..a49c565529a0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3314,6 +3314,9 @@ __lock_set_class(struct lockdep_map *lock, const char *name, unsigned int depth; int i; + if (unlikely(!debug_locks)) + return 0; + depth = curr->lockdep_depth; /* * This function is about (re)setting the class of a held lock, -- cgit v1.2.3 From e43196819cc3d2ac044f1669741f440397b82692 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Thu, 17 Dec 2015 07:34:27 +0800 Subject: sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task() [ Upstream commit 0905f04eb21fc1c2e690bed5d0418a061d56c225 ] If a newly created task is selected to go to a different CPU in fork balance when it wakes up the first time, its load averages should not be removed from the source CPU since they are never added to it before. The same is also applicable to a never used group entity. Fix it in remove_entity_load_avg(): when entity's last_update_time is 0, simply return. This should precisely identify the case in question, because in other migrations, the last_update_time is set to 0 after remove_entity_load_avg(). Reported-by: Steve Muckle Signed-off-by: Yuyang Du [peterz: cfs_rq_last_update_time] Signed-off-by: Peter Zijlstra (Intel) Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Morten Rasmussen Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vincent Guittot Link: http://lkml.kernel.org/r/20151216233427.GJ28098@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c2af250547bb..6051007918ad 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2841,27 +2841,45 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0); } -/* - * Task first catches up with cfs_rq, and then subtract - * itself from the cfs_rq (task must be off the queue now). - */ -void remove_entity_load_avg(struct sched_entity *se) -{ - struct cfs_rq *cfs_rq = cfs_rq_of(se); - u64 last_update_time; - #ifndef CONFIG_64BIT +static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq) +{ u64 last_update_time_copy; + u64 last_update_time; do { last_update_time_copy = cfs_rq->load_last_update_time_copy; smp_rmb(); last_update_time = cfs_rq->avg.last_update_time; } while (last_update_time != last_update_time_copy); + + return last_update_time; +} #else - last_update_time = cfs_rq->avg.last_update_time; +static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq) +{ + return cfs_rq->avg.last_update_time; +} #endif +/* + * Task first catches up with cfs_rq, and then subtract + * itself from the cfs_rq (task must be off the queue now). + */ +void remove_entity_load_avg(struct sched_entity *se) +{ + struct cfs_rq *cfs_rq = cfs_rq_of(se); + u64 last_update_time; + + /* + * Newly created task or never used group entity should not be removed + * from its (source) cfs_rq + */ + if (se->avg.last_update_time == 0) + return; + + last_update_time = cfs_rq_last_update_time(cfs_rq); + __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg); atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg); -- cgit v1.2.3 From bdf3c006b9a2308f481ca070d3a85baf1bc5d48f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 14 Jan 2016 15:21:40 -0800 Subject: vmstat: make vmstat_updater deferrable again and shut down on idle [ Upstream commit 0eb77e9880321915322d42913c3b53241739c8aa ] Currently the vmstat updater is not deferrable as a result of commit ba4877b9ca51 ("vmstat: do not use deferrable delayed work for vmstat_update"). This in turn can cause multiple interruptions of the applications because the vmstat updater may run at Make vmstate_update deferrable again and provide a function that folds the differentials when the processor is going to idle mode thus addressing the issue of the above commit in a clean way. Note that the shepherd thread will continue scanning the differentials from another processor and will reenable the vmstat workers if it detects any changes. Fixes: ba4877b9ca51 ("vmstat: do not use deferrable delayed work for vmstat_update") Signed-off-by: Christoph Lameter Cc: Michal Hocko Cc: Johannes Weiner Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/sched/idle.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index bfd573122e0d..306a859b36f0 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -219,6 +219,7 @@ static void cpu_idle_loop(void) */ __current_set_polling(); + quiet_vmstat(); tick_nohz_idle_enter(); while (!need_resched()) { -- cgit v1.2.3 From 6f311381f318a1c756075848784cd39cf32fa6e3 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 19 Jan 2016 17:14:29 +0200 Subject: perf: Synchronously free aux pages in case of allocation failure [ Upstream commit 45c815f06b80031659c63d7b93e580015d6024dd ] We are currently using asynchronous deallocation in the error path in AUX mmap code, which is unnecessary and also presents a problem for users that wish to probe for the biggest possible buffer size they can get: they'll get -EINVAL on all subsequent attemts to allocate a smaller buffer before the asynchronous deallocation callback frees up the pages from the previous unsuccessful attempt. Currently, gdb does that for allocating AUX buffers for Intel PT traces. More specifically, overwrite mode of AUX pmus that don't support hardware sg (some implementations of Intel PT, for instance) is limited to only one contiguous high order allocation for its buffer and there is no way of knowing its size without trying. This patch changes error path freeing to be synchronous as there won't be any contenders for the AUX pages at that point. Reported-by: Markus Metzger Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/1453216469-9509-1-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/ring_buffer.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 358bb53c1e74..94dc6b0763ab 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -468,6 +468,25 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx) __free_page(page); } +static void __rb_free_aux(struct ring_buffer *rb) +{ + int pg; + + if (rb->aux_priv) { + rb->free_aux(rb->aux_priv); + rb->free_aux = NULL; + rb->aux_priv = NULL; + } + + if (rb->aux_nr_pages) { + for (pg = 0; pg < rb->aux_nr_pages; pg++) + rb_free_aux_page(rb, pg); + + kfree(rb->aux_pages); + rb->aux_nr_pages = 0; + } +} + int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, pgoff_t pgoff, int nr_pages, long watermark, int flags) { @@ -556,30 +575,11 @@ out: if (!ret) rb->aux_pgoff = pgoff; else - rb_free_aux(rb); + __rb_free_aux(rb); return ret; } -static void __rb_free_aux(struct ring_buffer *rb) -{ - int pg; - - if (rb->aux_priv) { - rb->free_aux(rb->aux_priv); - rb->free_aux = NULL; - rb->aux_priv = NULL; - } - - if (rb->aux_nr_pages) { - for (pg = 0; pg < rb->aux_nr_pages; pg++) - rb_free_aux_page(rb, pg); - - kfree(rb->aux_pages); - rb->aux_nr_pages = 0; - } -} - void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) -- cgit v1.2.3 From 2ba2cca736d852cf0ec2fcd0b3e7148a3882adaa Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 4 Mar 2016 15:42:45 +0200 Subject: perf/ring_buffer: Refuse to begin AUX transaction after rb->aux_mmap_count drops [ Upstream commit dcb10a967ce82d5ad20570693091139ae716ff76 ] When ring buffer's AUX area is unmapped and rb->aux_mmap_count drops to zero, new AUX transactions into this buffer can still be started, even though the buffer in en route to deallocation. This patch adds a check to perf_aux_output_begin() for rb->aux_mmap_count being zero, in which case there is no point starting new transactions, in other words, the ring buffers that pass a certain point in perf_mmap_close will not have their events sending new data, which clears path for freeing those buffers' pages right there and then, provided that no active transactions are holding the AUX reference. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/1457098969-21595-2-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/ring_buffer.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 94dc6b0763ab..7324d83d6bd8 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -288,6 +288,13 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount)) goto err; + /* + * If rb::aux_mmap_count is zero (and rb_has_aux() above went through), + * the aux buffer is in perf_mmap_close(), about to get freed. + */ + if (!atomic_read(&rb->aux_mmap_count)) + goto err; + /* * Nesting is not supported for AUX area, make sure nested * writers are caught early -- cgit v1.2.3 From fbfaa26fa9e77f4b8b141f650340195f23cd7029 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 27 Apr 2016 17:47:11 +0100 Subject: PM / Hibernate: Call flush_icache_range() on pages restored in-place [ Upstream commit f6cf0545ec697ddc278b7457b7d0c0d86a2ea88e ] Some architectures require code written to memory as if it were data to be 'cleaned' from any data caches before the processor can fetch them as new instructions. During resume from hibernate, the snapshot code copies some pages directly, meaning these architectures do not get a chance to perform their cache maintenance. Modify the read and decompress code to call flush_icache_range() on all pages that are restored, so that the restored in-place pages are guaranteed to be executable on these architectures. Signed-off-by: James Morse Acked-by: Pavel Machek Acked-by: Rafael J. Wysocki Acked-by: Catalin Marinas [will: make clean_pages_on_* static and remove initialisers] Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- kernel/power/swap.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'kernel') diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 12cd989dadf6..160e1006640d 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -36,6 +36,14 @@ #define HIBERNATE_SIG "S1SUSPEND" +/* + * When reading an {un,}compressed image, we may restore pages in place, + * in which case some architectures need these pages cleaning before they + * can be executed. We don't know which pages these may be, so clean the lot. + */ +static bool clean_pages_on_read; +static bool clean_pages_on_decompress; + /* * The swap map is a data structure used for keeping track of each page * written to a swap partition. It consists of many swap_map_page @@ -241,6 +249,9 @@ static void hib_end_io(struct bio *bio) if (bio_data_dir(bio) == WRITE) put_page(page); + else if (clean_pages_on_read) + flush_icache_range((unsigned long)page_address(page), + (unsigned long)page_address(page) + PAGE_SIZE); if (bio->bi_error && !hb->error) hb->error = bio->bi_error; @@ -1049,6 +1060,7 @@ static int load_image(struct swap_map_handle *handle, hib_init_batch(&hb); + clean_pages_on_read = true; printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n", nr_to_read); m = nr_to_read / 10; @@ -1124,6 +1136,10 @@ static int lzo_decompress_threadfn(void *data) d->unc_len = LZO_UNC_SIZE; d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len, d->unc, &d->unc_len); + if (clean_pages_on_decompress) + flush_icache_range((unsigned long)d->unc, + (unsigned long)d->unc + d->unc_len); + atomic_set(&d->stop, 1); wake_up(&d->done); } @@ -1189,6 +1205,8 @@ static int load_image_lzo(struct swap_map_handle *handle, } memset(crc, 0, offsetof(struct crc_data, go)); + clean_pages_on_decompress = true; + /* * Start the decompression threads. */ -- cgit v1.2.3 From d200cc995188b88832350fae508f79211a284ff7 Mon Sep 17 00:00:00 2001 From: Lianwei Wang Date: Thu, 9 Jun 2016 23:43:28 -0700 Subject: cpu/hotplug: Handle unbalanced hotplug enable/disable [ Upstream commit 01b41159066531cc8d664362ff0cd89dd137bbfa ] When cpu_hotplug_enable() is called unbalanced w/o a preceeding cpu_hotplug_disable() the code emits a warning, but happily decrements the disabled counter. This causes the next operations to malfunction. Prevent the decrement and just emit a warning. Signed-off-by: Lianwei Wang Cc: peterz@infradead.org Cc: linux-pm@vger.kernel.org Cc: oleg@redhat.com Link: http://lkml.kernel.org/r/1465541008-12476-1-git-send-email-lianwei.wang@gmail.com Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- kernel/cpu.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 40d20bf5de28..42ce0b0ae5c5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -183,10 +183,17 @@ void cpu_hotplug_disable(void) } EXPORT_SYMBOL_GPL(cpu_hotplug_disable); +static void __cpu_hotplug_enable(void) +{ + if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n")) + return; + cpu_hotplug_disabled--; +} + void cpu_hotplug_enable(void) { cpu_maps_update_begin(); - WARN_ON(--cpu_hotplug_disabled < 0); + __cpu_hotplug_enable(); cpu_maps_update_done(); } EXPORT_SYMBOL_GPL(cpu_hotplug_enable); @@ -626,7 +633,7 @@ void enable_nonboot_cpus(void) /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); - WARN_ON(--cpu_hotplug_disabled < 0); + __cpu_hotplug_enable(); if (cpumask_empty(frozen_cpus)) goto out; -- cgit v1.2.3