summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c4
-rw-r--r--kernel/bpf/syscall.c13
-rw-r--r--kernel/events/core.c5
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/fair.c59
-rw-r--r--kernel/sched/idle.c2
-rw-r--r--kernel/sched/sched.h4
-rw-r--r--kernel/trace/trace.c19
8 files changed, 71 insertions, 39 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 0eb11b4ac4c7..daa4e0782cf7 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -270,9 +270,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
static void prog_fd_array_put_ptr(void *ptr)
{
- struct bpf_prog *prog = ptr;
-
- bpf_prog_put_rcu(prog);
+ bpf_prog_put(ptr);
}
/* decrement refcnt of all bpf_progs that are stored in this map */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4b9bbfe764e8..04fc1022ad9f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -487,7 +487,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
free_uid(user);
}
-static void __prog_put_common(struct rcu_head *rcu)
+static void __bpf_prog_put_rcu(struct rcu_head *rcu)
{
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
@@ -496,17 +496,10 @@ static void __prog_put_common(struct rcu_head *rcu)
bpf_prog_free(aux->prog);
}
-/* version of bpf_prog_put() that is called after a grace period */
-void bpf_prog_put_rcu(struct bpf_prog *prog)
-{
- if (atomic_dec_and_test(&prog->aux->refcnt))
- call_rcu(&prog->aux->rcu, __prog_put_common);
-}
-
void bpf_prog_put(struct bpf_prog *prog)
{
if (atomic_dec_and_test(&prog->aux->refcnt))
- __prog_put_common(&prog->aux->rcu);
+ call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
}
EXPORT_SYMBOL_GPL(bpf_prog_put);
@@ -514,7 +507,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
{
struct bpf_prog *prog = filp->private_data;
- bpf_prog_put_rcu(prog);
+ bpf_prog_put(prog);
return 0;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bfed031c5beb..0f9ada220789 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7023,6 +7023,8 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
goto unlock;
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+ if (event->cpu != smp_processor_id())
+ continue;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
continue;
if (event->attr.config != entry->type)
@@ -7144,7 +7146,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
prog = event->tp_event->prog;
if (prog && event->tp_event->bpf_prog_owner == event) {
event->tp_event->prog = NULL;
- bpf_prog_put_rcu(prog);
+ bpf_prog_put(prog);
}
}
@@ -8538,6 +8540,7 @@ SYSCALL_DEFINE5(perf_event_open,
f_flags);
if (IS_ERR(event_file)) {
err = PTR_ERR(event_file);
+ event_file = NULL;
goto err_context;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0c1b195a2aaf..d6a90c8bfedc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8144,11 +8144,9 @@ void sched_destroy_group(struct task_group *tg)
void sched_offline_group(struct task_group *tg)
{
unsigned long flags;
- int i;
/* end participation in shares distribution */
- for_each_possible_cpu(i)
- unregister_fair_sched_group(tg, i);
+ unregister_fair_sched_group(tg);
spin_lock_irqsave(&task_group_lock, flags);
list_del_rcu(&tg->list);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ccf212fcd5bf..54cf946d1a4c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4104,9 +4104,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
/*
* Add to the _head_ of the list, so that an already-started
- * distribute_cfs_runtime will not see us
+ * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is
+ * not running add to the tail so that later runqueues don't get starved.
*/
- list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
+ if (cfs_b->distribute_running)
+ list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
+ else
+ list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
/*
* If we're the first throttled task, make sure the bandwidth
@@ -4249,14 +4253,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
* in us over-using our runtime if it is all used during this loop, but
* only by limited amounts in that extreme case.
*/
- while (throttled && cfs_b->runtime > 0) {
+ while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
runtime = cfs_b->runtime;
+ cfs_b->distribute_running = 1;
raw_spin_unlock(&cfs_b->lock);
/* we can't nest cfs_b->lock while distributing bandwidth */
runtime = distribute_cfs_runtime(cfs_b, runtime,
runtime_expires);
raw_spin_lock(&cfs_b->lock);
+ cfs_b->distribute_running = 0;
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
cfs_b->runtime -= min(runtime, cfs_b->runtime);
@@ -4367,6 +4373,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
/* confirm we're still not at a refresh boundary */
raw_spin_lock(&cfs_b->lock);
+ if (cfs_b->distribute_running) {
+ raw_spin_unlock(&cfs_b->lock);
+ return;
+ }
+
if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
raw_spin_unlock(&cfs_b->lock);
return;
@@ -4376,6 +4387,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
runtime = cfs_b->runtime;
expires = cfs_b->runtime_expires;
+ if (runtime)
+ cfs_b->distribute_running = 1;
+
raw_spin_unlock(&cfs_b->lock);
if (!runtime)
@@ -4386,6 +4400,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
raw_spin_lock(&cfs_b->lock);
if (expires == cfs_b->runtime_expires)
cfs_b->runtime -= min(runtime, cfs_b->runtime);
+ cfs_b->distribute_running = 0;
raw_spin_unlock(&cfs_b->lock);
}
@@ -4497,6 +4512,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->period_timer.function = sched_cfs_period_timer;
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
+ cfs_b->distribute_running = 0;
}
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
@@ -10265,11 +10281,8 @@ void free_fair_sched_group(struct task_group *tg)
for_each_possible_cpu(i) {
if (tg->cfs_rq)
kfree(tg->cfs_rq[i]);
- if (tg->se) {
- if (tg->se[i])
- remove_entity_load_avg(tg->se[i]);
+ if (tg->se)
kfree(tg->se[i]);
- }
}
kfree(tg->cfs_rq);
@@ -10324,21 +10337,29 @@ err:
return 0;
}
-void unregister_fair_sched_group(struct task_group *tg, int cpu)
+void unregister_fair_sched_group(struct task_group *tg)
{
- struct rq *rq = cpu_rq(cpu);
unsigned long flags;
+ struct rq *rq;
+ int cpu;
- /*
- * Only empty task groups can be destroyed; so we can speculatively
- * check on_list without danger of it being re-added.
- */
- if (!tg->cfs_rq[cpu]->on_list)
- return;
+ for_each_possible_cpu(cpu) {
+ if (tg->se[cpu])
+ remove_entity_load_avg(tg->se[cpu]);
- raw_spin_lock_irqsave(&rq->lock, flags);
- list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
+ /*
+ * Only empty task groups can be destroyed; so we can speculatively
+ * check on_list without danger of it being re-added.
+ */
+ if (!tg->cfs_rq[cpu]->on_list)
+ continue;
+
+ rq = cpu_rq(cpu);
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ }
}
void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
@@ -10422,7 +10443,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
return 1;
}
-void unregister_fair_sched_group(struct task_group *tg, int cpu) { }
+void unregister_fair_sched_group(struct task_group *tg) { }
#endif /* CONFIG_FAIR_GROUP_SCHED */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 917c94abf5bb..cc0975f94a8d 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -133,7 +133,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
*/
static void cpuidle_idle_call(void)
{
- struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
+ struct cpuidle_device *dev = cpuidle_get_device();
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int next_state, entered_state;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d9ad549591d8..7f18a3243af5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -235,6 +235,8 @@ struct cfs_bandwidth {
/* statistics */
int nr_periods, nr_throttled;
u64 throttled_time;
+
+ bool distribute_running;
#endif
};
@@ -310,7 +312,7 @@ extern int tg_nop(struct task_group *tg, void *data);
extern void free_fair_sched_group(struct task_group *tg);
extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
-extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
+extern void unregister_fair_sched_group(struct task_group *tg);
extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
struct sched_entity *se, int cpu,
struct sched_entity *parent);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a3a3dd833fcf..eb1ac8faac16 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1778,7 +1778,17 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
{
__buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
+ /*
+ * If regs is not set, then skip the following callers:
+ * trace_buffer_unlock_commit_regs
+ * event_trigger_unlock_commit
+ * trace_event_buffer_commit
+ * trace_event_raw_event_sched_switch
+ * Note, we can still get here via blktrace, wakeup tracer
+ * and mmiotrace, but that's ok if they lose a function or
+ * two. They are that meaningful.
+ */
+ ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
@@ -1837,6 +1847,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
trace.skip = skip;
/*
+ * Add two, for this function and the call to save_stack_trace()
+ * If regs is set, then these functions will not be in the way.
+ */
+ if (!regs)
+ trace.skip += 2;
+
+ /*
* Since events can happen in NMIs there's no safe way to
* use the per cpu ftrace_stacks. We reserve it and if an interrupt
* or NMI comes in, it will just have to use the default