diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/auditfilter.c | 12 | ||||
| -rw-r--r-- | kernel/cpu.c | 23 | ||||
| -rw-r--r-- | kernel/cpuset.c | 18 | ||||
| -rw-r--r-- | kernel/cred.c | 9 | ||||
| -rw-r--r-- | kernel/events/ring_buffer.c | 33 | ||||
| -rw-r--r-- | kernel/futex.c | 4 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 5 | ||||
| -rw-r--r-- | kernel/ptrace.c | 30 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 5 | ||||
| -rw-r--r-- | kernel/sched/core.c | 33 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 4 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 1 | ||||
| -rw-r--r-- | kernel/signal.c | 2 | ||||
| -rw-r--r-- | kernel/sys.c | 2 | ||||
| -rw-r--r-- | kernel/sysctl.c | 6 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 2 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 3 |
18 files changed, 162 insertions, 32 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b57f929f1b46..cf7aa656b308 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1095,22 +1095,24 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data, int err = 0; struct audit_entry *entry; - entry = audit_data_to_entry(data, datasz); - if (IS_ERR(entry)) - return PTR_ERR(entry); - switch (type) { case AUDIT_ADD_RULE: + entry = audit_data_to_entry(data, datasz); + if (IS_ERR(entry)) + return PTR_ERR(entry); err = audit_add_rule(entry); audit_log_rule_change("add_rule", &entry->rule, !err); break; case AUDIT_DEL_RULE: + entry = audit_data_to_entry(data, datasz); + if (IS_ERR(entry)) + return PTR_ERR(entry); err = audit_del_rule(entry); audit_log_rule_change("remove_rule", &entry->rule, !err); break; default: - err = -EINVAL; WARN_ON(1); + return -EINVAL; } if (err || type == AUDIT_DEL_RULE) { diff --git a/kernel/cpu.c b/kernel/cpu.c index 5b4440d57f89..d484f2869861 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -8,6 +8,7 @@ #include <linux/init.h> #include <linux/notifier.h> #include <linux/sched.h> +#include <linux/sched/smt.h> #include <linux/unistd.h> #include <linux/cpu.h> #include <linux/oom.h> @@ -206,6 +207,12 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { } + /* Need to know about CPUs going up/down? */ int register_cpu_notifier(struct notifier_block *nb) { @@ -445,6 +452,7 @@ out_release: trace_sched_cpu_hotplug(cpu, err, 0); if (!err) cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu); + arch_smt_update(); return err; } @@ -549,7 +557,7 @@ out_notify: out: cpu_hotplug_done(); trace_sched_cpu_hotplug(cpu, ret, 1); - + arch_smt_update(); return ret; } @@ -911,6 +919,19 @@ void init_cpu_isolated(const struct cpumask *src) cpumask_copy(to_cpumask(cpu_isolated_bits), src); } +enum cpu_mitigations cpu_mitigations = CPU_MITIGATIONS_AUTO; + +static int __init mitigations_parse_cmdline(char *arg) +{ + if (!strcmp(arg, "off")) + cpu_mitigations = CPU_MITIGATIONS_OFF; + else if (!strcmp(arg, "auto")) + cpu_mitigations = CPU_MITIGATIONS_AUTO; + + return 0; +} +early_param("mitigations", mitigations_parse_cmdline); + static ATOMIC_NOTIFIER_HEAD(idle_notifier); void idle_notifier_register(struct notifier_block *n) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a599351997ad..2ce1a5297b92 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -419,14 +419,19 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) goto free_cs; + if (!alloc_cpumask_var(&trial->cpus_requested, GFP_KERNEL)) + goto free_allowed; if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL)) goto free_cpus; cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); + cpumask_copy(trial->cpus_requested, cs->cpus_requested); cpumask_copy(trial->effective_cpus, cs->effective_cpus); return trial; free_cpus: + free_cpumask_var(trial->cpus_requested); +free_allowed: free_cpumask_var(trial->cpus_allowed); free_cs: kfree(trial); @@ -440,6 +445,7 @@ free_cs: static void free_trial_cpuset(struct cpuset *trial) { free_cpumask_var(trial->effective_cpus); + free_cpumask_var(trial->cpus_requested); free_cpumask_var(trial->cpus_allowed); kfree(trial); } @@ -948,23 +954,23 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, return -EACCES; /* - * An empty cpus_allowed is ok only if the cpuset has no tasks. + * An empty cpus_requested is ok only if the cpuset has no tasks. * Since cpulist_parse() fails on an empty mask, we special case * that parsing. The validate_change() call ensures that cpusets * with tasks have cpus. */ if (!*buf) { - cpumask_clear(trialcs->cpus_allowed); + cpumask_clear(trialcs->cpus_requested); } else { retval = cpulist_parse(buf, trialcs->cpus_requested); if (retval < 0) return retval; + } - if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask)) - return -EINVAL; + if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask)) + return -EINVAL; - cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask); - } + cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask); /* Nothing to do if the cpus didn't change */ if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested)) diff --git a/kernel/cred.c b/kernel/cred.c index ff8606f77d90..098af0bc0b7e 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -447,6 +447,15 @@ int commit_creds(struct cred *new) if (task->mm) set_dumpable(task->mm, suid_dumpable); task->pdeath_signal = 0; + /* + * If a task drops privileges and becomes nondumpable, + * the dumpability change must become visible before + * the credential change; otherwise, a __ptrace_may_access() + * racing with this change may be able to attach to a task it + * shouldn't be able to attach to (as if the task had dropped + * privileges without becoming nondumpable). + * Pairs with a read barrier in __ptrace_may_access(). + */ smp_wmb(); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 424f5a5fa5a2..b128cc829bd8 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -49,14 +49,30 @@ static void perf_output_put_handle(struct perf_output_handle *handle) unsigned long head; again: + /* + * In order to avoid publishing a head value that goes backwards, + * we must ensure the load of @rb->head happens after we've + * incremented @rb->nest. + * + * Otherwise we can observe a @rb->head value before one published + * by an IRQ/NMI happening between the load and the increment. + */ + barrier(); head = local_read(&rb->head); /* - * IRQ/NMI can happen here, which means we can miss a head update. + * IRQ/NMI can happen here and advance @rb->head, causing our + * load above to be stale. */ - if (!local_dec_and_test(&rb->nest)) + /* + * If this isn't the outermost nesting, we don't have to update + * @rb->user_page->data_head. + */ + if (local_read(&rb->nest) > 1) { + local_dec(&rb->nest); goto out; + } /* * Since the mmap() consumer (userspace) can run on a different CPU: @@ -88,9 +104,18 @@ again: rb->user_page->data_head = head; /* - * Now check if we missed an update -- rely on previous implied - * compiler barriers to force a re-read. + * We must publish the head before decrementing the nest count, + * otherwise an IRQ/NMI can publish a more recent head value and our + * write will (temporarily) publish a stale value. + */ + barrier(); + local_set(&rb->nest, 0); + + /* + * Ensure we decrement @rb->nest before we validate the @rb->head. + * Otherwise we cannot be sure we caught the 'last' nested update. */ + barrier(); if (unlikely(head != local_read(&rb->head))) { local_inc(&rb->nest); goto again; diff --git a/kernel/futex.c b/kernel/futex.c index 39c2b3eda1b9..d24d164e9bdb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -593,8 +593,8 @@ again: * applies. If this is really a shmem page then the page lock * will prevent unexpected transitions. */ - lock_page(page); - shmem_swizzled = PageSwapCache(page) || page->mapping; + lock_page(page_head); + shmem_swizzled = PageSwapCache(page_head) || page_head->mapping; unlock_page(page_head); put_page(page_head); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 36a13b75b48f..d0193c0b2531 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -334,11 +334,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) desc->affinity_notify = notify; raw_spin_unlock_irqrestore(&desc->lock, flags); - if (!notify && old_notify) + if (old_notify) { cancel_work_sync(&old_notify->work); - - if (old_notify) kref_put(&old_notify->kref, old_notify->release); + } return 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5e2cd1030702..1aa33fe37aa8 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -228,6 +228,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { + if (mode & PTRACE_MODE_SCHED) + return false; + if (mode & PTRACE_MODE_NOAUDIT) return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); else @@ -289,15 +292,32 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) return -EPERM; ok: rcu_read_unlock(); + /* + * If a task drops privileges and becomes nondumpable (through a syscall + * like setresuid()) while we are trying to access it, we must ensure + * that the dumpability is read after the credentials; otherwise, + * we may be able to attach to a task that we shouldn't be able to + * attach to (as if the task had dropped privileges without becoming + * nondumpable). + * Pairs with a write barrier in commit_creds(). + */ + smp_rmb(); mm = task->mm; if (mm && ((get_dumpable(mm) != SUID_DUMP_USER) && !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; + if (mode & PTRACE_MODE_SCHED) + return 0; return security_ptrace_access_check(task, mode); } +bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) +{ + return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); +} + bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; @@ -663,6 +683,10 @@ static int ptrace_peek_siginfo(struct task_struct *child, if (arg.nr < 0) return -EINVAL; + /* Ensure arg.off fits in an unsigned long */ + if (arg.off > ULONG_MAX) + return 0; + if (arg.flags & PTRACE_PEEKSIGINFO_SHARED) pending = &child->signal->shared_pending; else @@ -670,18 +694,20 @@ static int ptrace_peek_siginfo(struct task_struct *child, for (i = 0; i < arg.nr; ) { siginfo_t info; - s32 off = arg.off + i; + unsigned long off = arg.off + i; + bool found = false; spin_lock_irq(&child->sighand->siglock); list_for_each_entry(q, &pending->list, list) { if (!off--) { + found = true; copy_siginfo(&info, &q->info); break; } } spin_unlock_irq(&child->sighand->siglock); - if (off >= 0) /* beyond the end of the list */ + if (!found) /* beyond the end of the list */ break; #ifdef CONFIG_COMPAT diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d89328e260df..041a02b334d7 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1603,6 +1603,10 @@ rcu_torture_cleanup(void) cur_ops->cb_barrier(); return; } + if (!cur_ops) { + torture_cleanup_end(); + return; + } rcu_torture_barrier_cleanup(); torture_stop_kthread(rcu_torture_stall, stall_task); @@ -1741,6 +1745,7 @@ rcu_torture_init(void) pr_alert(" %s", torture_ops[i]->name); pr_alert("\n"); firsterr = -EINVAL; + cur_ops = NULL; goto unwind; } if (cur_ops->fqs == NULL && fqs_duration != 0) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6b9021b9bc26..cccb3564410b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6373,6 +6373,10 @@ static void set_cpu_rq_start_time(void) rq->age_stamp = sched_clock_cpu(cpu); } +#ifdef CONFIG_SCHED_SMT +atomic_t sched_smt_present = ATOMIC_INIT(0); +#endif + static int sched_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -6389,11 +6393,23 @@ static int sched_cpu_active(struct notifier_block *nfb, * set_cpu_online(). But it might not yet have marked itself * as active, which is essential from here on. */ +#ifdef CONFIG_SCHED_SMT + /* + * When going up, increment the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + atomic_inc(&sched_smt_present); +#endif set_cpu_active(cpu, true); stop_machine_unpark(cpu); return NOTIFY_OK; case CPU_DOWN_FAILED: +#ifdef CONFIG_SCHED_SMT + /* Same as for CPU_ONLINE */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + atomic_inc(&sched_smt_present); +#endif set_cpu_active(cpu, true); return NOTIFY_OK; @@ -6408,7 +6424,15 @@ static int sched_cpu_inactive(struct notifier_block *nfb, switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: set_cpu_active((long)hcpu, false); +#ifdef CONFIG_SCHED_SMT + /* + * When going down, decrement the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask((long)hcpu)) == 2) + atomic_dec(&sched_smt_present); +#endif return NOTIFY_OK; + default: return NOTIFY_DONE; } @@ -9356,6 +9380,8 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset) static int cpu_shares_write_u64(struct cgroup_subsys_state *css, struct cftype *cftype, u64 shareval) { + if (shareval > scale_load_down(ULONG_MAX)) + shareval = MAX_SHARES; return sched_group_set_shares(css_tg(css), scale_load(shareval)); } @@ -9455,8 +9481,10 @@ int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) period = ktime_to_ns(tg->cfs_bandwidth.period); if (cfs_quota_us < 0) quota = RUNTIME_INF; - else + else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC) quota = (u64)cfs_quota_us * NSEC_PER_USEC; + else + return -EINVAL; return tg_set_cfs_bandwidth(tg, period, quota); } @@ -9478,6 +9506,9 @@ int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) { u64 quota, period; + if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC) + return -EINVAL; + period = (u64)cfs_period_us * NSEC_PER_USEC; quota = tg->cfs_bandwidth.quota; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c759c6a3ea20..1eac0642fa86 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1901,6 +1901,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) if (p->last_task_numa_placement) { delta = runtime - p->last_sum_exec_runtime; *period = now - p->last_task_numa_placement; + + /* Avoid time going backwards, prevent potential divide error: */ + if (unlikely((s64)*period < 0)) + *period = 0; } else { delta = p->se.avg.load_sum / p->se.load.weight; *period = LOAD_AVG_MAX; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 8cb8b1c555c8..12883703a021 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2,6 +2,7 @@ #include <linux/sched.h> #include <linux/sched/sysctl.h> #include <linux/sched/rt.h> +#include <linux/sched/smt.h> #include <linux/sched/deadline.h> #include <linux/mutex.h> #include <linux/spinlock.h> diff --git a/kernel/signal.c b/kernel/signal.c index 96e8c3cbfa38..072fd152ab01 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2244,6 +2244,8 @@ relock: if (signal_group_exit(signal)) { ksig->info.si_signo = signr = SIGKILL; sigdelset(¤t->pending.signal, SIGKILL); + trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, + &sighand->action[SIGKILL - 1]); recalc_sigpending(); goto fatal; } diff --git a/kernel/sys.c b/kernel/sys.c index ede0c1f4b860..29413e2bee41 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1764,7 +1764,7 @@ static int validate_prctl_map(struct prctl_mm_map *prctl_map) ((unsigned long)prctl_map->__m1 __op \ (unsigned long)prctl_map->__m2) ? 0 : -EINVAL error = __prctl_check_order(start_code, <, end_code); - error |= __prctl_check_order(start_data, <, end_data); + error |= __prctl_check_order(start_data,<=, end_data); error |= __prctl_check_order(start_brk, <=, brk); error |= __prctl_check_order(arg_start, <=, arg_end); error |= __prctl_check_order(env_start, <=, env_end); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 43a049f9264c..b25717eb3d3a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2782,8 +2782,10 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int if (neg) continue; val = convmul * val / convdiv; - if ((min && val < *min) || (max && val > *max)) - continue; + if ((min && val < *min) || (max && val > *max)) { + err = -EINVAL; + break; + } *i = val; } else { val = convdiv * (*i) / convmul; diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index ab861771e37f..0e0dc5d89911 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -633,7 +633,7 @@ static inline void process_adjtimex_modes(struct timex *txc, time_constant = max(time_constant, 0l); } - if (txc->modes & ADJ_TAI && txc->constant > 0) + if (txc->modes & ADJ_TAI && txc->constant >= 0) *time_tai = txc->constant; if (txc->modes & ADJ_OFFSET) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5e091614fe39..1cf2402c6922 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -701,7 +701,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) preempt_disable_notrace(); time = rb_time_stamp(buffer); - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); return time; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1235f9fd9fbd..d0451cfd9daa 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1289,9 +1289,6 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) char buf[32]; int len; - if (*ppos) - return 0; - if (unlikely(!id)) return -ENODEV; |
