summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/sched/core.c12
-rw-r--r--kernel/sched/fair.c11
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/sched/tune.c178
-rw-r--r--kernel/sched/tune.h6
6 files changed, 193 insertions, 22 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index ffba5df4abd5..62c4bd4abd3a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -54,6 +54,8 @@
#include <linux/writeback.h>
#include <linux/shm.h>
+#include "sched/tune.h"
+
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/pgtable.h>
@@ -699,6 +701,9 @@ void do_exit(long code)
}
exit_signals(tsk); /* sets PF_EXITING */
+
+ schedtune_exit_task(tsk);
+
/*
* tsk->flags are checked in the futex code to protect against
* an exiting task cleaning up the robust pi futexes.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 07f389a0f22f..b814c13f850f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -287,6 +287,18 @@ int sysctl_sched_rt_runtime = 950000;
/* cpus with isolated domains */
cpumask_var_t cpu_isolated_map;
+struct rq *
+lock_rq_of(struct task_struct *p, unsigned long *flags)
+{
+ return task_rq_lock(p, flags);
+}
+
+void
+unlock_rq_of(struct rq *rq, struct task_struct *p, unsigned long *flags)
+{
+ task_rq_unlock(rq, p, flags);
+}
+
/*
* this_rq_lock - lock this runqueue and disable interrupts.
*/
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9c717c3be75d..736adab1a503 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4250,8 +4250,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cpu_overutilized(rq->cpu))
rq->rd->overutilized = true;
- schedtune_enqueue_task(p, cpu_of(rq));
-
/*
* We want to potentially trigger a freq switch
* request only for tasks that are waking up; this is
@@ -4262,6 +4260,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (task_new || task_wakeup)
update_capacity_of(cpu_of(rq));
}
+
+ /* Update SchedTune accouting */
+ schedtune_enqueue_task(p, cpu_of(rq));
+
#endif /* CONFIG_SMP */
hrtick_update(rq);
@@ -4327,7 +4329,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_SMP
if (!se) {
- schedtune_dequeue_task(p, cpu_of(rq));
/*
* We want to potentially trigger a freq switch
@@ -4345,6 +4346,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}
}
+ /* Update SchedTune accouting */
+ schedtune_dequeue_task(p, cpu_of(rq));
+
#endif /* CONFIG_SMP */
hrtick_update(rq);
@@ -5625,7 +5629,6 @@ static inline int find_best_target(struct task_struct *p)
* The target CPU can be already at a capacity level higher
* than the one required to boost the task.
*/
-
if (new_util > capacity_orig_of(i))
continue;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5cd947923e11..1b838cff2f20 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1701,6 +1701,9 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}
+extern struct rq *lock_rq_of(struct task_struct *p, unsigned long *flags);
+extern void unlock_rq_of(struct rq *rq, struct task_struct *p, unsigned long *flags);
+
#ifdef CONFIG_SMP
#ifdef CONFIG_PREEMPT
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index a691b8db2888..4c77cc23e65b 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -11,6 +11,10 @@
#include "sched.h"
#include "tune.h"
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+static bool schedtune_initialized = false;
+#endif
+
unsigned int sysctl_sched_cfs_boost __read_mostly;
extern struct target_nrg schedtune_target_nrg;
@@ -222,6 +226,8 @@ struct boost_groups {
/* Count of RUNNABLE tasks on that boost group */
unsigned tasks;
} group[BOOSTGROUPS_COUNT];
+ /* CPU's boost group locking */
+ raw_spinlock_t lock;
};
/* Boost groups affecting each CPU in the system */
@@ -298,28 +304,24 @@ schedtune_boostgroup_update(int idx, int boost)
return 0;
}
+#define ENQUEUE_TASK 1
+#define DEQUEUE_TASK -1
+
static inline void
schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
{
- struct boost_groups *bg;
- int tasks;
-
- bg = &per_cpu(cpu_boost_groups, cpu);
+ struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+ int tasks = bg->group[idx].tasks + task_count;
/* Update boosted tasks count while avoiding to make it negative */
- if (task_count < 0 && bg->group[idx].tasks <= -task_count)
- bg->group[idx].tasks = 0;
- else
- bg->group[idx].tasks += task_count;
-
- /* Boost group activation or deactivation on that RQ */
- tasks = bg->group[idx].tasks;
- if (tasks == 1 || tasks == 0)
- schedtune_cpu_update(cpu);
+ bg->group[idx].tasks = max(0, tasks);
trace_sched_tune_tasks_update(p, cpu, tasks, idx,
bg->group[idx].boost, bg->boost_max);
+ /* Boost group activation or deactivation on that RQ */
+ if (tasks == 1 || tasks == 0)
+ schedtune_cpu_update(cpu);
}
/*
@@ -327,9 +329,14 @@ schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
*/
void schedtune_enqueue_task(struct task_struct *p, int cpu)
{
+ struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+ unsigned long irq_flags;
struct schedtune *st;
int idx;
+ if (!unlikely(schedtune_initialized))
+ return;
+
/*
* When a task is marked PF_EXITING by do_exit() it's going to be
* dequeued and enqueued multiple times in the exit path.
@@ -339,13 +346,109 @@ void schedtune_enqueue_task(struct task_struct *p, int cpu)
if (p->flags & PF_EXITING)
return;
- /* Get task boost group */
+ /*
+ * Boost group accouting is protected by a per-cpu lock and requires
+ * interrupt to be disabled to avoid race conditions for example on
+ * do_exit()::cgroup_exit() and task migration.
+ */
+ raw_spin_lock_irqsave(&bg->lock, irq_flags);
rcu_read_lock();
+
st = task_schedtune(p);
idx = st->idx;
+
+ schedtune_tasks_update(p, cpu, idx, ENQUEUE_TASK);
+
rcu_read_unlock();
+ raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
+}
- schedtune_tasks_update(p, cpu, idx, 1);
+int schedtune_allow_attach(struct cgroup_taskset *tset)
+{
+ /* We always allows tasks to be moved between existing CGroups */
+ return 0;
+}
+
+int schedtune_can_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+ struct boost_groups *bg;
+ unsigned long irq_flags;
+ unsigned int cpu;
+ struct rq *rq;
+ int src_bg; /* Source boost group index */
+ int dst_bg; /* Destination boost group index */
+ int tasks;
+
+ if (!unlikely(schedtune_initialized))
+ return 0;
+
+
+ cgroup_taskset_for_each(task, css, tset) {
+
+ /*
+ * Lock the CPU's RQ the task is enqueued to avoid race
+ * conditions with migration code while the task is being
+ * accounted
+ */
+ rq = lock_rq_of(task, &irq_flags);
+
+ if (!task->on_rq) {
+ unlock_rq_of(rq, task, &irq_flags);
+ continue;
+ }
+
+ /*
+ * Boost group accouting is protected by a per-cpu lock and requires
+ * interrupt to be disabled to avoid race conditions on...
+ */
+ cpu = cpu_of(rq);
+ bg = &per_cpu(cpu_boost_groups, cpu);
+ raw_spin_lock(&bg->lock);
+
+ dst_bg = css_st(css)->idx;
+ src_bg = task_schedtune(task)->idx;
+
+ /*
+ * Current task is not changing boostgroup, which can
+ * happen when the new hierarchy is in use.
+ */
+ if (unlikely(dst_bg == src_bg)) {
+ raw_spin_unlock(&bg->lock);
+ unlock_rq_of(rq, task, &irq_flags);
+ continue;
+ }
+
+ /*
+ * This is the case of a RUNNABLE task which is switching its
+ * current boost group.
+ */
+
+ /* Move task from src to dst boost group */
+ tasks = bg->group[src_bg].tasks - 1;
+ bg->group[src_bg].tasks = max(0, tasks);
+ bg->group[dst_bg].tasks += 1;
+
+ raw_spin_unlock(&bg->lock);
+ unlock_rq_of(rq, task, &irq_flags);
+
+ /* Update CPU boost group */
+ if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1)
+ schedtune_cpu_update(task_cpu(task));
+
+ }
+
+ return 0;
+}
+
+void schedtune_cancel_attach(struct cgroup_taskset *tset)
+{
+ /* This can happen only if SchedTune controller is mounted with
+ * other hierarchies ane one of them fails. Since usually SchedTune is
+ * mouted on its own hierarcy, for the time being we do not implement
+ * a proper rollback mechanism */
+ WARN(1, "SchedTune cancel attach not implemented");
}
/*
@@ -353,26 +456,62 @@ void schedtune_enqueue_task(struct task_struct *p, int cpu)
*/
void schedtune_dequeue_task(struct task_struct *p, int cpu)
{
+ struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+ unsigned long irq_flags;
struct schedtune *st;
int idx;
+ if (!unlikely(schedtune_initialized))
+ return;
+
/*
* When a task is marked PF_EXITING by do_exit() it's going to be
* dequeued and enqueued multiple times in the exit path.
* Thus we avoid any further update, since we do not want to change
* CPU boosting while the task is exiting.
- * The last dequeue will be done by cgroup exit() callback.
+ * The last dequeue is already enforce by the do_exit() code path
+ * via schedtune_exit_task().
*/
if (p->flags & PF_EXITING)
return;
- /* Get task boost group */
+ /*
+ * Boost group accouting is protected by a per-cpu lock and requires
+ * interrupt to be disabled to avoid race conditions on...
+ */
+ raw_spin_lock_irqsave(&bg->lock, irq_flags);
rcu_read_lock();
+
st = task_schedtune(p);
idx = st->idx;
+
+ schedtune_tasks_update(p, cpu, idx, DEQUEUE_TASK);
+
rcu_read_unlock();
+ raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
+}
+
+void schedtune_exit_task(struct task_struct *tsk)
+{
+ struct schedtune *st;
+ unsigned long irq_flags;
+ unsigned int cpu;
+ struct rq *rq;
+ int idx;
+
+ if (!unlikely(schedtune_initialized))
+ return;
- schedtune_tasks_update(p, cpu, idx, -1);
+ rq = lock_rq_of(tsk, &irq_flags);
+ rcu_read_lock();
+
+ cpu = cpu_of(rq);
+ st = task_schedtune(tsk);
+ idx = st->idx;
+ schedtune_tasks_update(tsk, cpu, idx, DEQUEUE_TASK);
+
+ rcu_read_unlock();
+ unlock_rq_of(rq, tsk, &irq_flags);
}
int schedtune_cpu_boost(int cpu)
@@ -518,6 +657,9 @@ schedtune_css_free(struct cgroup_subsys_state *css)
struct cgroup_subsys schedtune_cgrp_subsys = {
.css_alloc = schedtune_css_alloc,
.css_free = schedtune_css_free,
+// .allow_attach = schedtune_allow_attach,
+ .can_attach = schedtune_can_attach,
+ .cancel_attach = schedtune_cancel_attach,
.legacy_cftypes = files,
.early_init = 1,
};
diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h
index 99637758a8af..be1785eb1c5b 100644
--- a/kernel/sched/tune.h
+++ b/kernel/sched/tune.h
@@ -17,6 +17,8 @@ struct target_nrg {
int schedtune_cpu_boost(int cpu);
int schedtune_task_boost(struct task_struct *tsk);
+void schedtune_exit_task(struct task_struct *tsk);
+
void schedtune_enqueue_task(struct task_struct *p, int cpu);
void schedtune_dequeue_task(struct task_struct *p, int cpu);
@@ -25,6 +27,8 @@ void schedtune_dequeue_task(struct task_struct *p, int cpu);
#define schedtune_cpu_boost(cpu) get_sysctl_sched_cfs_boost()
#define schedtune_task_boost(tsk) get_sysctl_sched_cfs_boost()
+#define schedtune_exit_task(task) do { } while (0)
+
#define schedtune_enqueue_task(task, cpu) do { } while (0)
#define schedtune_dequeue_task(task, cpu) do { } while (0)
@@ -39,6 +43,8 @@ int schedtune_accept_deltas(int nrg_delta, int cap_delta,
#define schedtune_cpu_boost(cpu) 0
#define schedtune_task_boost(tsk) 0
+#define schedtune_exit_task(task) do { } while (0)
+
#define schedtune_enqueue_task(task, cpu) do { } while (0)
#define schedtune_dequeue_task(task, cpu) do { } while (0)