From 10dbe22927586c60c6344a37e8fc6bbbc7e3ada5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Apr 2019 22:03:18 +0200 Subject: trace: Fix preempt_enable_no_resched() abuse commit d6097c9e4454adf1f8f2c9547c2fa6060d55d952 upstream. Unless the very next line is schedule(), or implies it, one must not use preempt_enable_no_resched(). It can cause a preemption to go missing and thereby cause arbitrary delays, breaking the PREEMPT=y invariant. Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net Cc: Waiman Long Cc: Linus Torvalds Cc: Ingo Molnar Cc: Will Deacon Cc: Thomas Gleixner Cc: the arch/x86 maintainers Cc: Davidlohr Bueso Cc: Tim Chen Cc: huang ying Cc: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: stable@vger.kernel.org Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5e091614fe39..1cf2402c6922 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -701,7 +701,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) preempt_disable_notrace(); time = rb_time_stamp(buffer); - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); return time; } -- cgit v1.2.3 From 2f14dae91560e36ada26e4019f40ddf1798d7b70 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Sat, 20 Apr 2019 16:34:16 +0800 Subject: sched/numa: Fix a possible divide-by-zero commit a860fa7b96e1a1c974556327aa1aee852d434c21 upstream. sched_clock_cpu() may not be consistent between CPUs. If a task migrates to another CPU, then se.exec_start is set to that CPU's rq_clock_task() by update_stats_curr_start(). Specifically, the new value might be before the old value due to clock skew. So then if in numa_get_avg_runtime() the expression: 'now - p->last_task_numa_placement' ends up as -1, then the divider '*period + 1' in task_numa_placement() is 0 and things go bang. Similar to update_curr(), check if time goes backwards to avoid this. [ peterz: Wrote new changelog. ] [ mingo: Tweaked the code comment. ] Signed-off-by: Xie XiuQi Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: cj.chengjian@huawei.com Cc: Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d706cf4fda99..75bfa23f97b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1722,6 +1722,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) if (p->last_task_numa_placement) { delta = runtime - p->last_sum_exec_runtime; *period = now - p->last_task_numa_placement; + + /* Avoid time going backwards, prevent potential divide error: */ + if (unlikely((s64)*period < 0)) + *period = 0; } else { delta = p->se.avg.load_sum / p->se.load.weight; *period = LOAD_AVG_MAX; -- cgit v1.2.3 From 1aec586c25ee8be91afb07cbaed74cf94b827b79 Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Sun, 24 Mar 2019 07:57:04 -0700 Subject: genirq: Prevent use-after-free and work list corruption [ Upstream commit 59c39840f5abf4a71e1810a8da71aaccd6c17d26 ] When irq_set_affinity_notifier() replaces the notifier, then the reference count on the old notifier is dropped which causes it to be freed. But nothing ensures that the old notifier is not longer queued in the work list. If it is queued this results in a use after free and possibly in work list corruption. Ensure that the work is canceled before the reference is dropped. Signed-off-by: Prasad Sodagudi Signed-off-by: Thomas Gleixner Cc: marc.zyngier@arm.com Link: https://lkml.kernel.org/r/1553439424-6529-1-git-send-email-psodagud@codeaurora.org Signed-off-by: Sasha Levin --- kernel/irq/manage.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 83cea913983c..92c7eb1aeded 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -319,8 +319,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) desc->affinity_notify = notify; raw_spin_unlock_irqrestore(&desc->lock, flags); - if (old_notify) + if (old_notify) { + cancel_work_sync(&old_notify->work); kref_put(&old_notify->kref, old_notify->release); + } return 0; } -- cgit v1.2.3 From a9bdfbd494daa400b46f364498a9d35f932840fd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 7 May 2019 20:04:04 +0100 Subject: timer/debug: Change /proc/timer_stats from 0644 to 0600 The timer_stats facility should filter and translate PIDs if opened from a non-initial PID namespace, to avoid leaking information about the wider system. It should also not show kernel virtual addresses. Unfortunately it has now been removed upstream (as redundant) instead of being fixed. For stable, fix the leak by restricting access to root only. A similar change was already made for the /proc/timer_list file. Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 1adecb4b87c8..7e4d715f9c22 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -417,7 +417,7 @@ static int __init init_tstats_procfs(void) { struct proc_dir_entry *pe; - pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); + pe = proc_create("timer_stats", 0600, NULL, &tstats_fops); if (!pe) return -ENOMEM; return 0; -- cgit v1.2.3 From 4a215a1155ce2be31832ed1f5075cb57bb2818ec Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:18 +0200 Subject: x86/speculation: Apply IBPB more strictly to avoid cross-process data leak commit dbfe2953f63c640463c630746cd5d9de8b2f63ae upstream. Currently, IBPB is only issued in cases when switching into a non-dumpable process, the rationale being to protect such 'important and security sensitive' processess (such as GPG) from data leaking into a different userspace process via spectre v2. This is however completely insufficient to provide proper userspace-to-userpace spectrev2 protection, as any process can poison branch buffers before being scheduled out, and the newly scheduled process immediately becomes spectrev2 victim. In order to minimize the performance impact (for usecases that do require spectrev2 protection), issue the barrier only in cases when switching between processess where the victim can't be ptraced by the potential attacker (as in such cases, the attacker doesn't have to bother with branch buffers at all). [ tglx: Split up PTRACE_MODE_NOACCESS_CHK into PTRACE_MODE_SCHED and PTRACE_MODE_IBPB to be able to do ptrace() context tracking reasonably fine-grained ] Fixes: 18bf3c3ea8 ("x86/speculation: Use Indirect Branch Prediction Barrier in context switch") Originally-by: Tim Chen Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: "SchauflerCasey" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251437340.15880@cbobk.fhfr.pm Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- kernel/ptrace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5e2cd1030702..8303874c2a06 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -228,6 +228,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { + if (mode & PTRACE_MODE_SCHED) + return false; + if (mode & PTRACE_MODE_NOAUDIT) return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); else @@ -295,9 +298,16 @@ ok: !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; + if (mode & PTRACE_MODE_SCHED) + return 0; return security_ptrace_access_check(task, mode); } +bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) +{ + return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); +} + bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; -- cgit v1.2.3 From 1f562beba75c9da65461debbdb5d8d2871ab38a1 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 May 2019 00:46:25 +0100 Subject: sched: Add sched_smt_active() Add the sched_smt_active() function needed for some x86 speculation mitigations. This was introduced upstream by commits 1b568f0aabf2 "sched/core: Optimize SCHED_SMT", ba2591a5993e "sched/smt: Update sched_smt_present at runtime", c5511d03ec09 "sched/smt: Make sched_smt_present track topology", and 321a874a7ef8 "sched/smt: Expose sched_smt_present static key". The upstream implementation uses the static_key_{disable,enable}_cpuslocked() functions, which aren't practical to backport. Signed-off-by: Ben Hutchings Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra (Intel) Cc: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 24 ++++++++++++++++++++++++ kernel/sched/sched.h | 1 + 2 files changed, 25 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d0618951014b..d35a7d528ea6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5610,6 +5610,10 @@ static void set_cpu_rq_start_time(void) rq->age_stamp = sched_clock_cpu(cpu); } +#ifdef CONFIG_SCHED_SMT +atomic_t sched_smt_present = ATOMIC_INIT(0); +#endif + static int sched_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -5626,11 +5630,23 @@ static int sched_cpu_active(struct notifier_block *nfb, * set_cpu_online(). But it might not yet have marked itself * as active, which is essential from here on. */ +#ifdef CONFIG_SCHED_SMT + /* + * When going up, increment the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + atomic_inc(&sched_smt_present); +#endif set_cpu_active(cpu, true); stop_machine_unpark(cpu); return NOTIFY_OK; case CPU_DOWN_FAILED: +#ifdef CONFIG_SCHED_SMT + /* Same as for CPU_ONLINE */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + atomic_inc(&sched_smt_present); +#endif set_cpu_active(cpu, true); return NOTIFY_OK; @@ -5645,7 +5661,15 @@ static int sched_cpu_inactive(struct notifier_block *nfb, switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: set_cpu_active((long)hcpu, false); +#ifdef CONFIG_SCHED_SMT + /* + * When going down, decrement the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask((long)hcpu)) == 2) + atomic_dec(&sched_smt_present); +#endif return NOTIFY_OK; + default: return NOTIFY_DONE; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6893ee31df4d..8b96df04ba78 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 892d9881b437fb51581c410d90899a396e85f0cd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:39 +0100 Subject: x86/speculation: Rework SMT state change commit a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f upstream. arch_smt_update() is only called when the sysfs SMT control knob is changed. This means that when SMT is enabled in the sysfs control knob the system is considered to have SMT active even if all siblings are offline. To allow finegrained control of the speculation mitigations, the actual SMT state is more interesting than the fact that siblings could be enabled. Rework the code, so arch_smt_update() is invoked from each individual CPU hotplug function, and simplify the update function while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de [bwh: Backported to 4.4: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 42ce0b0ae5c5..c2bfeed74be3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -199,6 +200,12 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { } + /* Need to know about CPUs going up/down? */ int register_cpu_notifier(struct notifier_block *nb) { @@ -434,6 +441,7 @@ out_release: cpu_hotplug_done(); if (!err) cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu); + arch_smt_update(); return err; } @@ -537,7 +545,7 @@ out_notify: __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); out: cpu_hotplug_done(); - + arch_smt_update(); return ret; } -- cgit v1.2.3 From 31a2c5f7a25b1cf4739ccd0244b0b270c42dab89 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:28 -0500 Subject: cpu/speculation: Add 'mitigations=' cmdline option commit 98af8452945c55652de68536afdde3b520fec429 upstream. Keeping track of the number of mitigations for all the CPU speculation bugs has become overwhelming for many users. It's getting more and more complicated to decide which mitigations are needed for a given architecture. Complicating matters is the fact that each arch tends to have its own custom way to mitigate the same vulnerability. Most users fall into a few basic categories: a) they want all mitigations off; b) they want all reasonable mitigations on, with SMT enabled even if it's vulnerable; or c) they want all reasonable mitigations on, with SMT disabled if vulnerable. Define a set of curated, arch-independent options, each of which is an aggregation of existing options: - mitigations=off: Disable all mitigations. - mitigations=auto: [default] Enable all the default mitigations, but leave SMT enabled, even if it's vulnerable. - mitigations=auto,nosmt: Enable all the default mitigations, disabling SMT if needed by a mitigation. Currently, these options are placeholders which don't actually do anything. They will be fleshed out in upcoming patches. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina (on x86) Reviewed-by: Jiri Kosina Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Waiman Long Cc: Andrea Arcangeli Cc: Jon Masters Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Tyler Hicks Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Price Cc: Phil Auld Link: https://lkml.kernel.org/r/b07a8ef9b7c5055c3a4637c87d07c296d5016fe0.1555085500.git.jpoimboe@redhat.com [bwh: Backported to 4.4: - Drop the auto,nosmt option which we can't support - Adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index c2bfeed74be3..3225c3a9d028 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -842,3 +842,16 @@ void init_cpu_online(const struct cpumask *src) { cpumask_copy(to_cpumask(cpu_online_bits), src); } + +enum cpu_mitigations cpu_mitigations = CPU_MITIGATIONS_AUTO; + +static int __init mitigations_parse_cmdline(char *arg) +{ + if (!strcmp(arg, "off")) + cpu_mitigations = CPU_MITIGATIONS_OFF; + else if (!strcmp(arg, "auto")) + cpu_mitigations = CPU_MITIGATIONS_AUTO; + + return 0; +} +early_param("mitigations", mitigations_parse_cmdline); -- cgit v1.2.3