summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/inode.c7
-rw-r--r--kernel/bpf/syscall.c24
-rw-r--r--kernel/bpf/verifier.c66
-rw-r--r--kernel/cgroup.c14
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/events/ring_buffer.c10
-rw-r--r--kernel/futex.c27
-rw-r--r--kernel/locking/mcs_spinlock.h8
-rw-r--r--kernel/sched/core.c35
-rw-r--r--kernel/trace/trace_events.c9
-rw-r--r--kernel/workqueue.c40
11 files changed, 69 insertions, 175 deletions
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index d1a7646f79c5..5a8a797d50b7 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
{
switch (type) {
case BPF_TYPE_PROG:
- raw = bpf_prog_inc(raw);
+ atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
break;
case BPF_TYPE_MAP:
- raw = bpf_map_inc(raw, true);
+ bpf_map_inc(raw, true);
break;
default:
WARN_ON_ONCE(1);
@@ -277,8 +277,7 @@ static void *bpf_obj_do_get(const struct filename *pathname,
goto out;
raw = bpf_any_get(inode->i_private, *type);
- if (!IS_ERR(raw))
- touch_atime(&path);
+ touch_atime(&path);
path_put(&path);
return raw;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e32cc94edd9..3b39550d8485 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -181,18 +181,11 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
}
-/* prog's and map's refcnt limit */
-#define BPF_MAX_REFCNT 32768
-
-struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
+void bpf_map_inc(struct bpf_map *map, bool uref)
{
- if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
- atomic_dec(&map->refcnt);
- return ERR_PTR(-EBUSY);
- }
+ atomic_inc(&map->refcnt);
if (uref)
atomic_inc(&map->usercnt);
- return map;
}
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
@@ -204,7 +197,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
if (IS_ERR(map))
return map;
- map = bpf_map_inc(map, true);
+ bpf_map_inc(map, true);
fdput(f);
return map;
@@ -587,15 +580,6 @@ static struct bpf_prog *__bpf_prog_get(struct fd f)
return f.file->private_data;
}
-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
-{
- if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
- atomic_dec(&prog->aux->refcnt);
- return ERR_PTR(-EBUSY);
- }
- return prog;
-}
-
/* called by sockets/tracing/seccomp before attaching program to an event
* pairs with bpf_prog_put()
*/
@@ -608,7 +592,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
if (IS_ERR(prog))
return prog;
- prog = bpf_prog_inc(prog);
+ atomic_inc(&prog->aux->refcnt);
fdput(f);
return prog;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2cbfba78d3db..2e7f7ab739e4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -239,6 +239,15 @@ static const char * const reg_type_str[] = {
[CONST_IMM] = "imm",
};
+static const struct {
+ int map_type;
+ int func_id;
+} func_limit[] = {
+ {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
+ {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
+ {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
+};
+
static void print_verifier_state(struct verifier_env *env)
{
enum bpf_reg_type t;
@@ -889,44 +898,24 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
static int check_map_func_compatibility(struct bpf_map *map, int func_id)
{
+ bool bool_map, bool_func;
+ int i;
+
if (!map)
return 0;
- /* We need a two way check, first is from map perspective ... */
- switch (map->map_type) {
- case BPF_MAP_TYPE_PROG_ARRAY:
- if (func_id != BPF_FUNC_tail_call)
- goto error;
- break;
- case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
- if (func_id != BPF_FUNC_perf_event_read &&
- func_id != BPF_FUNC_perf_event_output)
- goto error;
- break;
- default:
- break;
- }
-
- /* ... and second from the function itself. */
- switch (func_id) {
- case BPF_FUNC_tail_call:
- if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
- goto error;
- break;
- case BPF_FUNC_perf_event_read:
- case BPF_FUNC_perf_event_output:
- if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
- goto error;
- break;
- default:
- break;
+ for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
+ bool_map = (map->map_type == func_limit[i].map_type);
+ bool_func = (func_id == func_limit[i].func_id);
+ /* only when map & func pair match it can continue.
+ * don't allow any other map type to be passed into
+ * the special func;
+ */
+ if (bool_func && bool_map != bool_func)
+ return -EINVAL;
}
return 0;
-error:
- verbose("cannot pass map_type %d into func %d\n",
- map->map_type, func_id);
- return -EINVAL;
}
static int check_call(struct verifier_env *env, int func_id)
@@ -1359,7 +1348,6 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
}
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
- BPF_SIZE(insn->code) == BPF_DW ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
verbose("BPF_LD_ABS uses reserved fields\n");
return -EINVAL;
@@ -2015,6 +2003,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
if (IS_ERR(map)) {
verbose("fd %d is not pointing to valid bpf_map\n",
insn->imm);
+ fdput(f);
return PTR_ERR(map);
}
@@ -2034,18 +2023,15 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
return -E2BIG;
}
+ /* remember this map */
+ env->used_maps[env->used_map_cnt++] = map;
+
/* hold the map. If the program is rejected by verifier,
* the map will be released by release_maps() or it
* will be used by the valid program until it's unloaded
* and all maps are released in free_bpf_prog_info()
*/
- map = bpf_map_inc(map, false);
- if (IS_ERR(map)) {
- fdput(f);
- return PTR_ERR(map);
- }
- env->used_maps[env->used_map_cnt++] = map;
-
+ bpf_map_inc(map, false);
fdput(f);
next_insn:
insn++;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9d03abef6676..e8d71110ed2a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2776,10 +2776,9 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
- struct cgroup_subsys *ss;
struct cgroup *cgrp;
pid_t pid;
- int ssid, ret;
+ int ret;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
@@ -2827,10 +2826,8 @@ out_unlock_rcu:
rcu_read_unlock();
out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
- for_each_subsys(ss, ssid)
- if (ss->post_attach)
- ss->post_attach();
cgroup_kn_unlock(of->kn);
+ cpuset_post_attach_flush();
return ret ?: nbytes;
}
@@ -4747,15 +4744,14 @@ static void css_free_work_fn(struct work_struct *work)
if (ss) {
/* css free path */
- struct cgroup_subsys_state *parent = css->parent;
int id = css->id;
+ if (css->parent)
+ css_put(css->parent);
+
ss->css_free(css);
cgroup_idr_remove(&ss->css_idr, id);
cgroup_put(cgrp);
-
- if (parent)
- css_put(parent);
} else {
/* cgroup free path */
atomic_dec(&cgrp->root->nr_cgrps);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a65d63463420..2df78d45a096 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -57,6 +57,7 @@
#include <asm/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
+#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/wait.h>
@@ -1014,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
}
}
-static void cpuset_post_attach(void)
+void cpuset_post_attach_flush(void)
{
flush_workqueue(cpuset_migrate_mm_wq);
}
@@ -2100,7 +2101,6 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.allow_attach = cpuset_allow_attach,
.cancel_attach = cpuset_cancel_attach,
.attach = cpuset_attach,
- .post_attach = cpuset_post_attach,
.bind = cpuset_bind,
.legacy_cftypes = files,
.early_init = 1,
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 014b69528194..adfdc0536117 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -347,7 +347,6 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
bool truncated)
{
struct ring_buffer *rb = handle->rb;
- bool wakeup = truncated;
unsigned long aux_head;
u64 flags = 0;
@@ -376,16 +375,9 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) {
- wakeup = true;
- local_add(rb->aux_watermark, &rb->aux_wakeup);
- }
-
- if (wakeup) {
- if (truncated)
- handle->event->pending_disable = 1;
perf_output_wakeup(handle);
+ local_add(rb->aux_watermark, &rb->aux_wakeup);
}
-
handle->event = NULL;
local_set(&rb->aux_nest, 0);
diff --git a/kernel/futex.c b/kernel/futex.c
index 9d8163afd87c..461c72b2dac2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1244,20 +1244,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
if (unlikely(should_fail_futex(true)))
ret = -EFAULT;
- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
ret = -EFAULT;
- } else if (curval != uval) {
- /*
- * If a unconditional UNLOCK_PI operation (user space did not
- * try the TID->0 transition) raced with a waiter setting the
- * FUTEX_WAITERS flag between get_user() and locking the hash
- * bucket lock, retry the operation.
- */
- if ((FUTEX_TID_MASK & curval) == uval)
- ret = -EAGAIN;
- else
- ret = -EINVAL;
- }
+ else if (curval != uval)
+ ret = -EINVAL;
if (ret) {
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
return ret;
@@ -1484,8 +1474,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
if (likely(&hb1->chain != &hb2->chain)) {
plist_del(&q->list, &hb1->chain);
hb_waiters_dec(hb1);
- hb_waiters_inc(hb2);
plist_add(&q->list, &hb2->chain);
+ hb_waiters_inc(hb2);
q->lock_ptr = &hb2->lock;
}
get_futex_key_refs(key2);
@@ -2548,15 +2538,6 @@ retry:
if (ret == -EFAULT)
goto pi_faulted;
/*
- * A unconditional UNLOCK_PI op raced against a waiter
- * setting the FUTEX_WAITERS bit. Try again.
- */
- if (ret == -EAGAIN) {
- spin_unlock(&hb->lock);
- put_futex_key(&key);
- goto retry;
- }
- /*
* wake_futex_pi has detected invalid state. Tell user
* space.
*/
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index c835270f0c2f..5b9102a47ea5 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -67,13 +67,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
node->locked = 0;
node->next = NULL;
- /*
- * We rely on the full barrier with global transitivity implied by the
- * below xchg() to order the initialization stores above against any
- * observation of @node. And to provide the ACQUIRE ordering associated
- * with a LOCK primitive.
- */
- prev = xchg(lock, node);
+ prev = xchg_acquire(lock, node);
if (likely(prev == NULL)) {
/*
* Lock acquired, don't need to set node->locked to 1. Threads
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 25afcb8a1402..db0472b37feb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11223,7 +11223,7 @@ void set_curr_task(int cpu, struct task_struct *p)
/* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock);
-static void sched_free_group(struct task_group *tg)
+static void free_sched_group(struct task_group *tg)
{
free_fair_sched_group(tg);
free_rt_sched_group(tg);
@@ -11249,7 +11249,7 @@ struct task_group *sched_create_group(struct task_group *parent)
return tg;
err:
- sched_free_group(tg);
+ free_sched_group(tg);
return ERR_PTR(-ENOMEM);
}
@@ -11269,16 +11269,17 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
}
/* rcu callback to free various structures associated with a task group */
-static void sched_free_group_rcu(struct rcu_head *rhp)
+static void free_sched_group_rcu(struct rcu_head *rhp)
{
/* now it should be safe to free those cfs_rqs */
- sched_free_group(container_of(rhp, struct task_group, rcu));
+ free_sched_group(container_of(rhp, struct task_group, rcu));
}
+/* Destroy runqueue etc associated with a task group */
void sched_destroy_group(struct task_group *tg)
{
/* wait for possible concurrent references to cfs_rqs complete */
- call_rcu(&tg->rcu, sched_free_group_rcu);
+ call_rcu(&tg->rcu, free_sched_group_rcu);
}
void sched_offline_group(struct task_group *tg)
@@ -11739,26 +11740,31 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (IS_ERR(tg))
return ERR_PTR(-ENOMEM);
- sched_online_group(tg, parent);
-
return &tg->css;
}
-static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
{
struct task_group *tg = css_tg(css);
+ struct task_group *parent = css_tg(css->parent);
- sched_offline_group(tg);
+ if (parent)
+ sched_online_group(tg, parent);
+ return 0;
}
static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
{
struct task_group *tg = css_tg(css);
- /*
- * Relies on the RCU grace period between css_released() and this.
- */
- sched_free_group(tg);
+ sched_destroy_group(tg);
+}
+
+static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
+{
+ struct task_group *tg = css_tg(css);
+
+ sched_offline_group(tg);
}
static void cpu_cgroup_fork(struct task_struct *task, void *private)
@@ -12187,8 +12193,9 @@ static struct cftype cpu_files[] = {
struct cgroup_subsys cpu_cgrp_subsys = {
.css_alloc = cpu_cgroup_css_alloc,
- .css_released = cpu_cgroup_css_released,
.css_free = cpu_cgroup_css_free,
+ .css_online = cpu_cgroup_css_online,
+ .css_offline = cpu_cgroup_css_offline,
.fork = cpu_cgroup_fork,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 26960e49bb8c..fda3b6e1b3a0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2108,13 +2108,8 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
trace_create_file("filter", 0644, file->dir, file,
&ftrace_event_filter_fops);
- /*
- * Only event directories that can be enabled should have
- * triggers.
- */
- if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
- trace_create_file("trigger", 0644, file->dir, file,
- &event_trigger_fops);
+ trace_create_file("trigger", 0644, file->dir, file,
+ &event_trigger_fops);
trace_create_file("format", 0444, file->dir, call,
&ftrace_event_format_fops);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 316b316c7528..ef84d9874d03 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -652,35 +652,6 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
*/
smp_wmb();
set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
- /*
- * The following mb guarantees that previous clear of a PENDING bit
- * will not be reordered with any speculative LOADS or STORES from
- * work->current_func, which is executed afterwards. This possible
- * reordering can lead to a missed execution on attempt to qeueue
- * the same @work. E.g. consider this case:
- *
- * CPU#0 CPU#1
- * ---------------------------- --------------------------------
- *
- * 1 STORE event_indicated
- * 2 queue_work_on() {
- * 3 test_and_set_bit(PENDING)
- * 4 } set_..._and_clear_pending() {
- * 5 set_work_data() # clear bit
- * 6 smp_mb()
- * 7 work->current_func() {
- * 8 LOAD event_indicated
- * }
- *
- * Without an explicit full barrier speculative LOAD on line 8 can
- * be executed before CPU#0 does STORE on line 1. If that happens,
- * CPU#0 observes the PENDING bit is still set and new execution of
- * a @work is not queued in a hope, that CPU#1 will eventually
- * finish the queued @work. Meanwhile CPU#1 does not see
- * event_indicated is set, because speculative LOAD was executed
- * before actual STORE.
- */
- smp_mb();
}
static void clear_work_data(struct work_struct *work)
@@ -4476,17 +4447,6 @@ static void rebind_workers(struct worker_pool *pool)
pool->attrs->cpumask) < 0);
spin_lock_irq(&pool->lock);
-
- /*
- * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED
- * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is
- * being reworked and this can go away in time.
- */
- if (!(pool->flags & POOL_DISASSOCIATED)) {
- spin_unlock_irq(&pool->lock);
- return;
- }
-
pool->flags &= ~POOL_DISASSOCIATED;
for_each_pool_worker(worker, pool) {