summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditfilter.c12
-rw-r--r--kernel/cpu.c23
-rw-r--r--kernel/cpuset.c18
-rw-r--r--kernel/cred.c9
-rw-r--r--kernel/events/ring_buffer.c33
-rw-r--r--kernel/futex.c4
-rw-r--r--kernel/irq/manage.c5
-rw-r--r--kernel/ptrace.c30
-rw-r--r--kernel/rcu/rcutorture.c5
-rw-r--r--kernel/sched/core.c33
-rw-r--r--kernel/sched/fair.c4
-rw-r--r--kernel/sched/sched.h1
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sysctl.c6
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace_events.c3
18 files changed, 162 insertions, 32 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b57f929f1b46..cf7aa656b308 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1095,22 +1095,24 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data,
int err = 0;
struct audit_entry *entry;
- entry = audit_data_to_entry(data, datasz);
- if (IS_ERR(entry))
- return PTR_ERR(entry);
-
switch (type) {
case AUDIT_ADD_RULE:
+ entry = audit_data_to_entry(data, datasz);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
err = audit_add_rule(entry);
audit_log_rule_change("add_rule", &entry->rule, !err);
break;
case AUDIT_DEL_RULE:
+ entry = audit_data_to_entry(data, datasz);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
err = audit_del_rule(entry);
audit_log_rule_change("remove_rule", &entry->rule, !err);
break;
default:
- err = -EINVAL;
WARN_ON(1);
+ return -EINVAL;
}
if (err || type == AUDIT_DEL_RULE) {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5b4440d57f89..d484f2869861 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/oom.h>
@@ -206,6 +207,12 @@ void cpu_hotplug_enable(void)
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#endif /* CONFIG_HOTPLUG_CPU */
+/*
+ * Architectures that need SMT-specific errata handling during SMT hotplug
+ * should override this.
+ */
+void __weak arch_smt_update(void) { }
+
/* Need to know about CPUs going up/down? */
int register_cpu_notifier(struct notifier_block *nb)
{
@@ -445,6 +452,7 @@ out_release:
trace_sched_cpu_hotplug(cpu, err, 0);
if (!err)
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
+ arch_smt_update();
return err;
}
@@ -549,7 +557,7 @@ out_notify:
out:
cpu_hotplug_done();
trace_sched_cpu_hotplug(cpu, ret, 1);
-
+ arch_smt_update();
return ret;
}
@@ -911,6 +919,19 @@ void init_cpu_isolated(const struct cpumask *src)
cpumask_copy(to_cpumask(cpu_isolated_bits), src);
}
+enum cpu_mitigations cpu_mitigations = CPU_MITIGATIONS_AUTO;
+
+static int __init mitigations_parse_cmdline(char *arg)
+{
+ if (!strcmp(arg, "off"))
+ cpu_mitigations = CPU_MITIGATIONS_OFF;
+ else if (!strcmp(arg, "auto"))
+ cpu_mitigations = CPU_MITIGATIONS_AUTO;
+
+ return 0;
+}
+early_param("mitigations", mitigations_parse_cmdline);
+
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
void idle_notifier_register(struct notifier_block *n)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a599351997ad..2ce1a5297b92 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -419,14 +419,19 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL))
goto free_cs;
+ if (!alloc_cpumask_var(&trial->cpus_requested, GFP_KERNEL))
+ goto free_allowed;
if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL))
goto free_cpus;
cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
+ cpumask_copy(trial->cpus_requested, cs->cpus_requested);
cpumask_copy(trial->effective_cpus, cs->effective_cpus);
return trial;
free_cpus:
+ free_cpumask_var(trial->cpus_requested);
+free_allowed:
free_cpumask_var(trial->cpus_allowed);
free_cs:
kfree(trial);
@@ -440,6 +445,7 @@ free_cs:
static void free_trial_cpuset(struct cpuset *trial)
{
free_cpumask_var(trial->effective_cpus);
+ free_cpumask_var(trial->cpus_requested);
free_cpumask_var(trial->cpus_allowed);
kfree(trial);
}
@@ -948,23 +954,23 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
return -EACCES;
/*
- * An empty cpus_allowed is ok only if the cpuset has no tasks.
+ * An empty cpus_requested is ok only if the cpuset has no tasks.
* Since cpulist_parse() fails on an empty mask, we special case
* that parsing. The validate_change() call ensures that cpusets
* with tasks have cpus.
*/
if (!*buf) {
- cpumask_clear(trialcs->cpus_allowed);
+ cpumask_clear(trialcs->cpus_requested);
} else {
retval = cpulist_parse(buf, trialcs->cpus_requested);
if (retval < 0)
return retval;
+ }
- if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
- return -EINVAL;
+ if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
+ return -EINVAL;
- cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
- }
+ cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
/* Nothing to do if the cpus didn't change */
if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
diff --git a/kernel/cred.c b/kernel/cred.c
index ff8606f77d90..098af0bc0b7e 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -447,6 +447,15 @@ int commit_creds(struct cred *new)
if (task->mm)
set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;
+ /*
+ * If a task drops privileges and becomes nondumpable,
+ * the dumpability change must become visible before
+ * the credential change; otherwise, a __ptrace_may_access()
+ * racing with this change may be able to attach to a task it
+ * shouldn't be able to attach to (as if the task had dropped
+ * privileges without becoming nondumpable).
+ * Pairs with a read barrier in __ptrace_may_access().
+ */
smp_wmb();
}
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 424f5a5fa5a2..b128cc829bd8 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -49,14 +49,30 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
unsigned long head;
again:
+ /*
+ * In order to avoid publishing a head value that goes backwards,
+ * we must ensure the load of @rb->head happens after we've
+ * incremented @rb->nest.
+ *
+ * Otherwise we can observe a @rb->head value before one published
+ * by an IRQ/NMI happening between the load and the increment.
+ */
+ barrier();
head = local_read(&rb->head);
/*
- * IRQ/NMI can happen here, which means we can miss a head update.
+ * IRQ/NMI can happen here and advance @rb->head, causing our
+ * load above to be stale.
*/
- if (!local_dec_and_test(&rb->nest))
+ /*
+ * If this isn't the outermost nesting, we don't have to update
+ * @rb->user_page->data_head.
+ */
+ if (local_read(&rb->nest) > 1) {
+ local_dec(&rb->nest);
goto out;
+ }
/*
* Since the mmap() consumer (userspace) can run on a different CPU:
@@ -88,9 +104,18 @@ again:
rb->user_page->data_head = head;
/*
- * Now check if we missed an update -- rely on previous implied
- * compiler barriers to force a re-read.
+ * We must publish the head before decrementing the nest count,
+ * otherwise an IRQ/NMI can publish a more recent head value and our
+ * write will (temporarily) publish a stale value.
+ */
+ barrier();
+ local_set(&rb->nest, 0);
+
+ /*
+ * Ensure we decrement @rb->nest before we validate the @rb->head.
+ * Otherwise we cannot be sure we caught the 'last' nested update.
*/
+ barrier();
if (unlikely(head != local_read(&rb->head))) {
local_inc(&rb->nest);
goto again;
diff --git a/kernel/futex.c b/kernel/futex.c
index 39c2b3eda1b9..d24d164e9bdb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -593,8 +593,8 @@ again:
* applies. If this is really a shmem page then the page lock
* will prevent unexpected transitions.
*/
- lock_page(page);
- shmem_swizzled = PageSwapCache(page) || page->mapping;
+ lock_page(page_head);
+ shmem_swizzled = PageSwapCache(page_head) || page_head->mapping;
unlock_page(page_head);
put_page(page_head);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 36a13b75b48f..d0193c0b2531 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -334,11 +334,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
desc->affinity_notify = notify;
raw_spin_unlock_irqrestore(&desc->lock, flags);
- if (!notify && old_notify)
+ if (old_notify) {
cancel_work_sync(&old_notify->work);
-
- if (old_notify)
kref_put(&old_notify->kref, old_notify->release);
+ }
return 0;
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5e2cd1030702..1aa33fe37aa8 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -228,6 +228,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
{
+ if (mode & PTRACE_MODE_SCHED)
+ return false;
+
if (mode & PTRACE_MODE_NOAUDIT)
return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
else
@@ -289,15 +292,32 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
return -EPERM;
ok:
rcu_read_unlock();
+ /*
+ * If a task drops privileges and becomes nondumpable (through a syscall
+ * like setresuid()) while we are trying to access it, we must ensure
+ * that the dumpability is read after the credentials; otherwise,
+ * we may be able to attach to a task that we shouldn't be able to
+ * attach to (as if the task had dropped privileges without becoming
+ * nondumpable).
+ * Pairs with a write barrier in commit_creds().
+ */
+ smp_rmb();
mm = task->mm;
if (mm &&
((get_dumpable(mm) != SUID_DUMP_USER) &&
!ptrace_has_cap(mm->user_ns, mode)))
return -EPERM;
+ if (mode & PTRACE_MODE_SCHED)
+ return 0;
return security_ptrace_access_check(task, mode);
}
+bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode)
+{
+ return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED);
+}
+
bool ptrace_may_access(struct task_struct *task, unsigned int mode)
{
int err;
@@ -663,6 +683,10 @@ static int ptrace_peek_siginfo(struct task_struct *child,
if (arg.nr < 0)
return -EINVAL;
+ /* Ensure arg.off fits in an unsigned long */
+ if (arg.off > ULONG_MAX)
+ return 0;
+
if (arg.flags & PTRACE_PEEKSIGINFO_SHARED)
pending = &child->signal->shared_pending;
else
@@ -670,18 +694,20 @@ static int ptrace_peek_siginfo(struct task_struct *child,
for (i = 0; i < arg.nr; ) {
siginfo_t info;
- s32 off = arg.off + i;
+ unsigned long off = arg.off + i;
+ bool found = false;
spin_lock_irq(&child->sighand->siglock);
list_for_each_entry(q, &pending->list, list) {
if (!off--) {
+ found = true;
copy_siginfo(&info, &q->info);
break;
}
}
spin_unlock_irq(&child->sighand->siglock);
- if (off >= 0) /* beyond the end of the list */
+ if (!found) /* beyond the end of the list */
break;
#ifdef CONFIG_COMPAT
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d89328e260df..041a02b334d7 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1603,6 +1603,10 @@ rcu_torture_cleanup(void)
cur_ops->cb_barrier();
return;
}
+ if (!cur_ops) {
+ torture_cleanup_end();
+ return;
+ }
rcu_torture_barrier_cleanup();
torture_stop_kthread(rcu_torture_stall, stall_task);
@@ -1741,6 +1745,7 @@ rcu_torture_init(void)
pr_alert(" %s", torture_ops[i]->name);
pr_alert("\n");
firsterr = -EINVAL;
+ cur_ops = NULL;
goto unwind;
}
if (cur_ops->fqs == NULL && fqs_duration != 0) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6b9021b9bc26..cccb3564410b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6373,6 +6373,10 @@ static void set_cpu_rq_start_time(void)
rq->age_stamp = sched_clock_cpu(cpu);
}
+#ifdef CONFIG_SCHED_SMT
+atomic_t sched_smt_present = ATOMIC_INIT(0);
+#endif
+
static int sched_cpu_active(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
@@ -6389,11 +6393,23 @@ static int sched_cpu_active(struct notifier_block *nfb,
* set_cpu_online(). But it might not yet have marked itself
* as active, which is essential from here on.
*/
+#ifdef CONFIG_SCHED_SMT
+ /*
+ * When going up, increment the number of cores with SMT present.
+ */
+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+ atomic_inc(&sched_smt_present);
+#endif
set_cpu_active(cpu, true);
stop_machine_unpark(cpu);
return NOTIFY_OK;
case CPU_DOWN_FAILED:
+#ifdef CONFIG_SCHED_SMT
+ /* Same as for CPU_ONLINE */
+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+ atomic_inc(&sched_smt_present);
+#endif
set_cpu_active(cpu, true);
return NOTIFY_OK;
@@ -6408,7 +6424,15 @@ static int sched_cpu_inactive(struct notifier_block *nfb,
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DOWN_PREPARE:
set_cpu_active((long)hcpu, false);
+#ifdef CONFIG_SCHED_SMT
+ /*
+ * When going down, decrement the number of cores with SMT present.
+ */
+ if (cpumask_weight(cpu_smt_mask((long)hcpu)) == 2)
+ atomic_dec(&sched_smt_present);
+#endif
return NOTIFY_OK;
+
default:
return NOTIFY_DONE;
}
@@ -9356,6 +9380,8 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset)
static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
struct cftype *cftype, u64 shareval)
{
+ if (shareval > scale_load_down(ULONG_MAX))
+ shareval = MAX_SHARES;
return sched_group_set_shares(css_tg(css), scale_load(shareval));
}
@@ -9455,8 +9481,10 @@ int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
period = ktime_to_ns(tg->cfs_bandwidth.period);
if (cfs_quota_us < 0)
quota = RUNTIME_INF;
- else
+ else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
quota = (u64)cfs_quota_us * NSEC_PER_USEC;
+ else
+ return -EINVAL;
return tg_set_cfs_bandwidth(tg, period, quota);
}
@@ -9478,6 +9506,9 @@ int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
{
u64 quota, period;
+ if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
+ return -EINVAL;
+
period = (u64)cfs_period_us * NSEC_PER_USEC;
quota = tg->cfs_bandwidth.quota;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c759c6a3ea20..1eac0642fa86 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1901,6 +1901,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
if (p->last_task_numa_placement) {
delta = runtime - p->last_sum_exec_runtime;
*period = now - p->last_task_numa_placement;
+
+ /* Avoid time going backwards, prevent potential divide error: */
+ if (unlikely((s64)*period < 0))
+ *period = 0;
} else {
delta = p->se.avg.load_sum / p->se.load.weight;
*period = LOAD_AVG_MAX;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8cb8b1c555c8..12883703a021 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
+#include <linux/sched/smt.h>
#include <linux/sched/deadline.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 96e8c3cbfa38..072fd152ab01 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2244,6 +2244,8 @@ relock:
if (signal_group_exit(signal)) {
ksig->info.si_signo = signr = SIGKILL;
sigdelset(&current->pending.signal, SIGKILL);
+ trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
+ &sighand->action[SIGKILL - 1]);
recalc_sigpending();
goto fatal;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index ede0c1f4b860..29413e2bee41 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1764,7 +1764,7 @@ static int validate_prctl_map(struct prctl_mm_map *prctl_map)
((unsigned long)prctl_map->__m1 __op \
(unsigned long)prctl_map->__m2) ? 0 : -EINVAL
error = __prctl_check_order(start_code, <, end_code);
- error |= __prctl_check_order(start_data, <, end_data);
+ error |= __prctl_check_order(start_data,<=, end_data);
error |= __prctl_check_order(start_brk, <=, brk);
error |= __prctl_check_order(arg_start, <=, arg_end);
error |= __prctl_check_order(env_start, <=, env_end);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 43a049f9264c..b25717eb3d3a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2782,8 +2782,10 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
if (neg)
continue;
val = convmul * val / convdiv;
- if ((min && val < *min) || (max && val > *max))
- continue;
+ if ((min && val < *min) || (max && val > *max)) {
+ err = -EINVAL;
+ break;
+ }
*i = val;
} else {
val = convdiv * (*i) / convmul;
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index ab861771e37f..0e0dc5d89911 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -633,7 +633,7 @@ static inline void process_adjtimex_modes(struct timex *txc,
time_constant = max(time_constant, 0l);
}
- if (txc->modes & ADJ_TAI && txc->constant > 0)
+ if (txc->modes & ADJ_TAI && txc->constant >= 0)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5e091614fe39..1cf2402c6922 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -701,7 +701,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
preempt_disable_notrace();
time = rb_time_stamp(buffer);
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
return time;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1235f9fd9fbd..d0451cfd9daa 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1289,9 +1289,6 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
char buf[32];
int len;
- if (*ppos)
- return 0;
-
if (unlikely(!id))
return -ENODEV;