summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c3
-rw-r--r--kernel/cpuset.c17
-rw-r--r--kernel/events/core.c3
-rw-r--r--kernel/fork.c53
-rw-r--r--kernel/gcov/base.c6
-rw-r--r--kernel/gcov/gcc_4_7.c4
-rw-r--r--kernel/locking/lockdep.c11
-rw-r--r--kernel/locking/locktorture.c4
-rw-r--r--kernel/power/process.c5
-rw-r--r--kernel/rcu/tree.c12
-rw-r--r--kernel/sched/core.c24
-rw-r--r--kernel/sched/cpufreq_sched.c14
-rw-r--r--kernel/sched/cpufreq_schedutil.c69
-rw-r--r--kernel/sched/deadline.c6
-rw-r--r--kernel/sched/fair.c48
-rw-r--r--kernel/sched/rt.c4
-rw-r--r--kernel/sched/sched.h19
-rw-r--r--kernel/sched/walt.c317
-rw-r--r--kernel/seccomp.c23
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/timer.c2
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/ftrace.c24
-rw-r--r--kernel/trace/trace.c14
-rw-r--r--kernel/trace/trace_irqsoff.c133
-rw-r--r--kernel/trace/trace_selftest.c2
27 files changed, 390 insertions, 441 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c97bce6a0e0e..eb759f5008b8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1044,7 +1044,8 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
}
} else {
if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
- (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
+ (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
+ BPF_CLASS(insn->code) == BPF_ALU64) {
verbose("BPF_END uses reserved fields\n");
return -EINVAL;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1656a48d5bee..a599351997ad 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1916,6 +1916,7 @@ static struct cftype files[] = {
{
.name = "memory_pressure",
.read_u64 = cpuset_read_u64,
+ .private = FILE_MEMORY_PRESSURE,
},
{
@@ -2299,6 +2300,13 @@ retry:
mutex_unlock(&cpuset_mutex);
}
+static bool force_rebuild;
+
+void cpuset_force_rebuild(void)
+{
+ force_rebuild = true;
+}
+
/**
* cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
*
@@ -2373,8 +2381,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
}
/* rebuild sched domains if cpus_allowed has changed */
- if (cpus_updated)
+ if (cpus_updated || force_rebuild) {
+ force_rebuild = false;
rebuild_sched_domains();
+ }
}
void cpuset_update_active_cpus(bool cpu_online)
@@ -2393,6 +2403,11 @@ void cpuset_update_active_cpus(bool cpu_online)
schedule_work(&cpuset_hotplug_work);
}
+void cpuset_wait_for_hotplug(void)
+{
+ flush_work(&cpuset_hotplug_work);
+}
+
/*
* Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
* Call this routine anytime after node_states[N_MEMORY] changes.
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 98928fb7fecc..322f63370038 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7302,6 +7302,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
}
event->tp_event->prog = prog;
+ event->tp_event->bpf_prog_owner = event;
return 0;
}
@@ -7314,7 +7315,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
return;
prog = event->tp_event->prog;
- if (prog) {
+ if (prog && event->tp_event->bpf_prog_owner == event) {
event->tp_event->prog = NULL;
bpf_prog_put_rcu(prog);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 07cd0d68ee02..1d168ba55118 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -178,13 +178,13 @@ static inline void free_thread_stack(unsigned long *stack)
# else
static struct kmem_cache *thread_stack_cache;
-static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk,
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
int node)
{
return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
}
-static void free_stack(unsigned long *stack)
+static void free_thread_stack(unsigned long *stack)
{
kmem_cache_free(thread_stack_cache, stack);
}
@@ -697,6 +697,26 @@ void __mmdrop(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(__mmdrop);
+static inline void __mmput(struct mm_struct *mm)
+{
+ VM_BUG_ON(atomic_read(&mm->mm_users));
+
+ uprobe_clear_state(mm);
+ exit_aio(mm);
+ ksm_exit(mm);
+ khugepaged_exit(mm); /* must run before exit_mmap */
+ exit_mmap(mm);
+ set_mm_exe_file(mm, NULL);
+ if (!list_empty(&mm->mmlist)) {
+ spin_lock(&mmlist_lock);
+ list_del(&mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ if (mm->binfmt)
+ module_put(mm->binfmt->module);
+ mmdrop(mm);
+}
+
/*
* Decrement the use count and release all resources for an mm.
*/
@@ -706,26 +726,27 @@ int mmput(struct mm_struct *mm)
might_sleep();
if (atomic_dec_and_test(&mm->mm_users)) {
- uprobe_clear_state(mm);
- exit_aio(mm);
- ksm_exit(mm);
- khugepaged_exit(mm); /* must run before exit_mmap */
- exit_mmap(mm);
- set_mm_exe_file(mm, NULL);
- if (!list_empty(&mm->mmlist)) {
- spin_lock(&mmlist_lock);
- list_del(&mm->mmlist);
- spin_unlock(&mmlist_lock);
- }
- if (mm->binfmt)
- module_put(mm->binfmt->module);
- mmdrop(mm);
+ __mmput(mm);
mm_freed = 1;
}
return mm_freed;
}
EXPORT_SYMBOL_GPL(mmput);
+static void mmput_async_fn(struct work_struct *work)
+{
+ struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
+ __mmput(mm);
+}
+
+void mmput_async(struct mm_struct *mm)
+{
+ if (atomic_dec_and_test(&mm->mm_users)) {
+ INIT_WORK(&mm->async_put_work, mmput_async_fn);
+ schedule_work(&mm->async_put_work);
+ }
+}
+
/**
* set_mm_exe_file - change a reference to the mm's executable file
*
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index 7080ae1eb6c1..f850e906564b 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters)
}
EXPORT_SYMBOL(__gcov_merge_icall_topn);
+void __gcov_exit(void)
+{
+ /* Unused. */
+}
+EXPORT_SYMBOL(__gcov_exit);
+
/**
* gcov_enable_events - enable event reporting through gcov_event()
*
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index e25e92fb44fa..46a18e72bce6 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
#include <linux/vmalloc.h>
#include "gcov.h"
-#if __GNUC__ == 5 && __GNUC_MINOR__ >= 1
+#if (__GNUC__ >= 7)
+#define GCOV_COUNTERS 9
+#elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
#define GCOV_COUNTERS 10
#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
#define GCOV_COUNTERS 9
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 60ace56618f6..0e2c4911ba61 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3128,10 +3128,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (depth) {
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
- if (hlock->references)
+ if (hlock->references) {
+ /*
+ * Check: unsigned int references:12, overflow.
+ */
+ if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
+ return 0;
+
hlock->references++;
- else
+ } else {
hlock->references = 2;
+ }
return 1;
}
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8ef1919d63b2..d580b7d6ee6d 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -776,6 +776,8 @@ static void lock_torture_cleanup(void)
else
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: SUCCESS");
+ kfree(cxt.lwsa);
+ kfree(cxt.lrsa);
torture_cleanup_end();
}
@@ -917,6 +919,8 @@ static int __init lock_torture_init(void)
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
+ kfree(writer_tasks);
+ writer_tasks = NULL;
firsterr = -ENOMEM;
goto unwind;
}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e7f1f736a5b6..cc177142a08f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -19,8 +19,9 @@
#include <linux/kmod.h>
#include <trace/events/power.h>
#include <linux/wakeup_reason.h>
+#include <linux/cpuset.h>
-/*
+/*
* Timeout for stopping processes
*/
unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC;
@@ -208,6 +209,8 @@ void thaw_processes(void)
__usermodehelper_set_disable_depth(UMH_FREEZING);
thaw_workqueues();
+ cpuset_wait_for_hotplug();
+
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
/* No other threads should have PF_SUSPEND_TASK set */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ba183e7987c..3decfbc88308 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -757,6 +757,12 @@ void rcu_irq_exit(void)
local_irq_save(flags);
rdtp = this_cpu_ptr(&rcu_dynticks);
+
+ /* Page faults can happen in NMI handlers, so check... */
+ if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+ return;
+
+ RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting--;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
@@ -885,6 +891,12 @@ void rcu_irq_enter(void)
local_irq_save(flags);
rdtp = this_cpu_ptr(&rcu_dynticks);
+
+ /* Page faults can happen in NMI handlers, so check... */
+ if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+ return;
+
+ RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2dbe599d34d5..c408280ddd12 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1372,7 +1372,9 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0);
+ p->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(p, cpu);
+ p->on_rq = TASK_ON_RQ_QUEUED;
activate_task(dst_rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(dst_rq, p, 0);
@@ -3176,20 +3178,20 @@ static void sched_freq_tick_pelt(int cpu)
#ifdef CONFIG_SCHED_WALT
static void sched_freq_tick_walt(int cpu)
{
- unsigned long cpu_utilization = cpu_util(cpu);
+ unsigned long cpu_utilization = cpu_util_freq(cpu);
unsigned long capacity_curr = capacity_curr_of(cpu);
if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
return sched_freq_tick_pelt(cpu);
/*
- * Add a margin to the WALT utilization.
+ * Add a margin to the WALT utilization to check if we will need to
+ * increase frequency.
* NOTE: WALT tracks a single CPU signal for all the scheduling
* classes, thus this margin is going to be added to the DL class as
* well, which is something we do not do in sched_freq_tick_pelt case.
*/
- cpu_utilization = add_capacity_margin(cpu_utilization);
- if (cpu_utilization <= capacity_curr)
+ if (add_capacity_margin(cpu_utilization) <= capacity_curr)
return;
/*
@@ -3207,16 +3209,9 @@ static void sched_freq_tick_walt(int cpu)
static void sched_freq_tick(int cpu)
{
- unsigned long capacity_orig, capacity_curr;
-
if (!sched_freq())
return;
- capacity_orig = capacity_orig_of(cpu);
- capacity_curr = capacity_curr_of(cpu);
- if (capacity_curr == capacity_orig)
- return;
-
_sched_freq_tick(cpu);
}
#else
@@ -8188,17 +8183,16 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
* operation in the resume sequence, just build a single sched
* domain, ignoring cpusets.
*/
- num_cpus_frozen--;
- if (likely(num_cpus_frozen)) {
- partition_sched_domains(1, NULL, NULL);
+ partition_sched_domains(1, NULL, NULL);
+ if (--num_cpus_frozen)
break;
- }
/*
* This is the last CPU online operation. So fall through and
* restore the original sched domains by considering the
* cpuset configurations.
*/
+ cpuset_force_rebuild();
case CPU_ONLINE:
cpuset_update_active_cpus(true);
diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
index f10d9f7d6d07..6ffb23adbcef 100644
--- a/kernel/sched/cpufreq_sched.c
+++ b/kernel/sched/cpufreq_sched.c
@@ -235,6 +235,18 @@ out:
cpufreq_cpu_put(policy);
}
+#ifdef CONFIG_SCHED_WALT
+static inline unsigned long
+requested_capacity(struct sched_capacity_reqs *scr)
+{
+ if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+ return scr->cfs;
+ return scr->cfs + scr->rt;
+}
+#else
+#define requested_capacity(scr) (scr->cfs + scr->rt)
+#endif
+
void update_cpu_capacity_request(int cpu, bool request)
{
unsigned long new_capacity;
@@ -245,7 +257,7 @@ void update_cpu_capacity_request(int cpu, bool request)
scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
- new_capacity = scr->cfs + scr->rt;
+ new_capacity = requested_capacity(scr);
new_capacity = new_capacity * capacity_margin
/ SCHED_CAPACITY_SCALE;
new_capacity += scr->dl;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e12309c1b07b..28977799017b 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -64,8 +64,9 @@ struct sugov_cpu {
struct update_util_data update_util;
struct sugov_policy *sg_policy;
- unsigned long iowait_boost;
- unsigned long iowait_boost_max;
+ bool iowait_boost_pending;
+ unsigned int iowait_boost;
+ unsigned int iowait_boost_max;
u64 last_update;
/* The fields below are only needed when sharing a policy. */
@@ -224,30 +225,54 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
unsigned int flags)
{
if (flags & SCHED_CPUFREQ_IOWAIT) {
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ if (sg_cpu->iowait_boost_pending)
+ return;
+
+ sg_cpu->iowait_boost_pending = true;
+
+ if (sg_cpu->iowait_boost) {
+ sg_cpu->iowait_boost <<= 1;
+ if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
+ sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ } else {
+ sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ }
} else if (sg_cpu->iowait_boost) {
s64 delta_ns = time - sg_cpu->last_update;
/* Clear iowait_boost if the CPU apprears to have been idle. */
- if (delta_ns > TICK_NSEC)
+ if (delta_ns > TICK_NSEC) {
sg_cpu->iowait_boost = 0;
+ sg_cpu->iowait_boost_pending = false;
+ }
}
}
static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
unsigned long *max)
{
- unsigned long boost_util = sg_cpu->iowait_boost;
- unsigned long boost_max = sg_cpu->iowait_boost_max;
+ unsigned int boost_util, boost_max;
- if (!boost_util)
+ if (!sg_cpu->iowait_boost)
return;
+ if (sg_cpu->iowait_boost_pending) {
+ sg_cpu->iowait_boost_pending = false;
+ } else {
+ sg_cpu->iowait_boost >>= 1;
+ if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ sg_cpu->iowait_boost = 0;
+ return;
+ }
+ }
+
+ boost_util = sg_cpu->iowait_boost;
+ boost_max = sg_cpu->iowait_boost_max;
+
if (*util * boost_max < *max * boost_util) {
*util = boost_util;
*max = boost_max;
}
- sg_cpu->iowait_boost >>= 1;
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -297,11 +322,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
sugov_update_commit(sg_policy, time, next_f);
}
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
- u64 last_freq_update_time = sg_policy->last_freq_update_time;
unsigned long util = 0, max = 1;
unsigned int j;
@@ -317,9 +341,10 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
* enough, don't take the CPU into account as it probably is
* idle now (and clear iowait_boost for it).
*/
- delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+ delta_ns = time - j_sg_cpu->last_update;
if (delta_ns > TICK_NSEC) {
j_sg_cpu->iowait_boost = 0;
+ j_sg_cpu->iowait_boost_pending = false;
continue;
}
if (j_sg_cpu->flags & SCHED_CPUFREQ_DL)
@@ -361,7 +386,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
if (flags & SCHED_CPUFREQ_DL)
next_f = sg_policy->policy->cpuinfo.max_freq;
else
- next_f = sugov_next_freq_shared(sg_cpu);
+ next_f = sugov_next_freq_shared(sg_cpu, time);
sugov_update_commit(sg_policy, time, next_f);
}
@@ -589,7 +614,6 @@ static int sugov_init(struct cpufreq_policy *policy)
{
struct sugov_policy *sg_policy;
struct sugov_tunables *tunables;
- unsigned int lat;
int ret = 0;
/* State should be equivalent to EXIT */
@@ -628,12 +652,19 @@ static int sugov_init(struct cpufreq_policy *policy)
goto stop_kthread;
}
- tunables->up_rate_limit_us = LATENCY_MULTIPLIER;
- tunables->down_rate_limit_us = LATENCY_MULTIPLIER;
- lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
- if (lat) {
- tunables->up_rate_limit_us *= lat;
- tunables->down_rate_limit_us *= lat;
+ if (policy->up_transition_delay_us && policy->down_transition_delay_us) {
+ tunables->up_rate_limit_us = policy->up_transition_delay_us;
+ tunables->down_rate_limit_us = policy->down_transition_delay_us;
+ } else {
+ unsigned int lat;
+
+ tunables->up_rate_limit_us = LATENCY_MULTIPLIER;
+ tunables->down_rate_limit_us = LATENCY_MULTIPLIER;
+ lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+ if (lat) {
+ tunables->up_rate_limit_us *= lat;
+ tunables->down_rate_limit_us *= lat;
+ }
}
policy->governor_data = sg_policy;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a105e97ab6bf..167a1038cff0 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -18,6 +18,8 @@
#include <linux/slab.h>
+#include "walt.h"
+
struct dl_bandwidth def_dl_bandwidth;
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
@@ -1623,7 +1625,9 @@ retry:
next_task->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(rq, next_task, 0);
clear_average_bw(&next_task->dl, &rq->dl);
+ next_task->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(next_task, later_rq->cpu);
+ next_task->on_rq = TASK_ON_RQ_QUEUED;
add_average_bw(&next_task->dl, &later_rq->dl);
activate_task(later_rq, next_task, 0);
next_task->on_rq = TASK_ON_RQ_QUEUED;
@@ -1715,7 +1719,9 @@ static void pull_dl_task(struct rq *this_rq)
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0);
clear_average_bw(&p->dl, &src_rq->dl);
+ p->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(p, this_cpu);
+ p->on_rq = TASK_ON_RQ_QUEUED;
add_average_bw(&p->dl, &this_rq->dl);
activate_task(this_rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 853064319b0d..6e3ab49c262a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5809,10 +5809,11 @@ static inline void hrtick_update(struct rq *rq)
#endif
#ifdef CONFIG_SMP
+static bool __cpu_overutilized(int cpu, int delta);
static bool cpu_overutilized(int cpu);
unsigned long boosted_cpu_util(int cpu);
#else
-#define boosted_cpu_util(cpu) cpu_util(cpu)
+#define boosted_cpu_util(cpu) cpu_util_freq(cpu)
#endif
#if defined(CONFIG_SMP) && defined(CONFIG_CPU_FREQ_GOV_SCHED)
@@ -6626,10 +6627,8 @@ end:
*/
static int sched_group_energy(struct energy_env *eenv)
{
- struct sched_domain *sd;
- int cpu, total_energy = 0;
struct cpumask visit_cpus;
- struct sched_group *sg;
+ u64 total_energy = 0;
WARN_ON(!eenv->sg_top->sge);
@@ -6637,8 +6636,8 @@ static int sched_group_energy(struct energy_env *eenv)
while (!cpumask_empty(&visit_cpus)) {
struct sched_group *sg_shared_cap = NULL;
-
- cpu = cpumask_first(&visit_cpus);
+ int cpu = cpumask_first(&visit_cpus);
+ struct sched_domain *sd;
/*
* Is the group utilization affected by cpus outside this
@@ -6650,7 +6649,7 @@ static int sched_group_energy(struct energy_env *eenv)
sg_shared_cap = sd->parent->groups;
for_each_domain(cpu, sd) {
- sg = sd->groups;
+ struct sched_group *sg = sd->groups;
/* Has this sched_domain already been visited? */
if (sd->child && group_first_cpu(sg) != cpu)
@@ -6686,11 +6685,9 @@ static int sched_group_energy(struct energy_env *eenv)
idle_idx = group_idle_state(eenv, sg);
group_util = group_norm_util(eenv, sg);
- sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
- >> SCHED_CAPACITY_SHIFT;
+ sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power);
sg_idle_energy = ((SCHED_LOAD_SCALE-group_util)
- * sg->sge->idle_states[idle_idx].power)
- >> SCHED_CAPACITY_SHIFT;
+ * sg->sge->idle_states[idle_idx].power);
total_energy += sg_busy_energy + sg_idle_energy;
@@ -6715,7 +6712,7 @@ next_cpu:
continue;
}
- eenv->energy = total_energy;
+ eenv->energy = total_energy >> SCHED_CAPACITY_SHIFT;
return 0;
}
@@ -7004,9 +7001,14 @@ static inline bool task_fits_max(struct task_struct *p, int cpu)
return __task_fits(p, cpu, 0);
}
+static bool __cpu_overutilized(int cpu, int delta)
+{
+ return (capacity_of(cpu) * 1024) < ((cpu_util(cpu) + delta) * capacity_margin);
+}
+
static bool cpu_overutilized(int cpu)
{
- return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
+ return __cpu_overutilized(cpu, 0);
}
#ifdef CONFIG_SCHED_TUNE
@@ -7085,7 +7087,7 @@ schedtune_task_margin(struct task_struct *task)
unsigned long
boosted_cpu_util(int cpu)
{
- unsigned long util = cpu_util(cpu);
+ unsigned long util = cpu_util_freq(cpu);
long margin = schedtune_cpu_margin(util, cpu);
trace_sched_boost_cpu(cpu, util, margin);
@@ -7384,9 +7386,6 @@ static int start_cpu(bool boosted)
{
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
- RCU_LOCKDEP_WARN(rcu_read_lock_sched_held(),
- "sched RCU must be held");
-
return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu;
}
@@ -7729,6 +7728,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
}
if (target_cpu != prev_cpu) {
+ int delta = 0;
struct energy_env eenv = {
.util_delta = task_util(p),
.src_cpu = prev_cpu,
@@ -7736,8 +7736,13 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
.task = p,
};
+
+#ifdef CONFIG_SCHED_WALT
+ if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+ delta = task_util(p);
+#endif
/* Not enough spare capacity on previous cpu */
- if (cpu_overutilized(prev_cpu)) {
+ if (__cpu_overutilized(prev_cpu, delta)) {
schedstat_inc(p, se.statistics.nr_wakeups_secb_insuff_cap);
schedstat_inc(this_rq(), eas_stats.secb_insuff_cap);
goto unlock;
@@ -10268,6 +10273,7 @@ static int need_active_balance(struct lb_env *env)
if (energy_aware() &&
(capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) &&
+ ((capacity_orig_of(env->src_cpu) < capacity_orig_of(env->dst_cpu))) &&
env->src_rq->cfs.h_nr_running == 1 &&
cpu_overutilized(env->src_cpu) &&
!cpu_overutilized(env->dst_cpu)) {
@@ -11348,8 +11354,8 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
return true;
/* Do idle load balance if there have misfit task */
- if (energy_aware() && rq->misfit_task)
- return 1;
+ if (energy_aware())
+ return rq->misfit_task;
return (rq->nr_running >= 2);
}
@@ -11391,7 +11397,7 @@ static inline bool nohz_kick_needed(struct rq *rq, int *type)
#ifndef CONFIG_SCHED_HMP
rcu_read_lock();
sd = rcu_dereference(per_cpu(sd_busy, cpu));
- if (sd && !energy_aware()) {
+ if (sd) {
sgc = sd->groups->sgc;
nr_busy = atomic_read(&sgc->nr_busy_cpus);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ee095f4e7230..23b68b051cee 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2117,7 +2117,9 @@ retry:
next_task->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(rq, next_task, 0);
+ next_task->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(next_task, lowest_rq->cpu);
+ next_task->on_rq = TASK_ON_RQ_QUEUED;
activate_task(lowest_rq, next_task, 0);
next_task->on_rq = TASK_ON_RQ_QUEUED;
ret = 1;
@@ -2373,7 +2375,9 @@ static void pull_rt_task(struct rq *this_rq)
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0);
+ p->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(p, this_cpu);
+ p->on_rq = TASK_ON_RQ_QUEUED;
activate_task(this_rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 33bf0c07e757..c53970b5a8f0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2363,6 +2363,12 @@ static inline unsigned long __cpu_util(int cpu, int delta)
unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
unsigned long capacity = capacity_orig_of(cpu);
+#ifdef CONFIG_SCHED_WALT
+ if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+ util = div64_u64(cpu_rq(cpu)->cumulative_runnable_avg,
+ walt_ravg_window >> SCHED_LOAD_SHIFT);
+#endif
+
delta += util;
if (delta < 0)
return 0;
@@ -2375,6 +2381,19 @@ static inline unsigned long cpu_util(int cpu)
return __cpu_util(cpu, 0);
}
+static inline unsigned long cpu_util_freq(int cpu)
+{
+ unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
+ unsigned long capacity = capacity_orig_of(cpu);
+
+#ifdef CONFIG_SCHED_WALT
+ if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+ util = div64_u64(cpu_rq(cpu)->prev_runnable_sum,
+ walt_ravg_window >> SCHED_LOAD_SHIFT);
+#endif
+ return (util >= capacity) ? capacity : util;
+}
+
#endif
#ifdef CONFIG_CPU_FREQ_GOV_SCHED
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 92c3aae8e056..441cba01bc04 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -20,7 +20,6 @@
*/
#include <linux/syscore_ops.h>
-#include <linux/cpufreq.h>
#include <trace/events/sched.h>
#include "sched.h"
#include "walt.h"
@@ -45,29 +44,6 @@ unsigned int sysctl_sched_walt_init_task_load_pct = 15;
/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
unsigned int __read_mostly walt_disabled = 0;
-static unsigned int max_possible_efficiency = 1024;
-static unsigned int min_possible_efficiency = 1024;
-
-/*
- * Maximum possible frequency across all cpus. Task demand and cpu
- * capacity (cpu_power) metrics are scaled in reference to it.
- */
-static unsigned int max_possible_freq = 1;
-
-/*
- * Minimum possible max_freq across all cpus. This will be same as
- * max_possible_freq on homogeneous systems and could be different from
- * max_possible_freq on heterogenous systems. min_max_freq is used to derive
- * capacity (cpu_power) of cpus.
- */
-static unsigned int min_max_freq = 1;
-
-static unsigned int max_load_scale_factor = 1024;
-static unsigned int max_possible_capacity = 1024;
-
-/* Mask of all CPUs that have max_possible_capacity */
-static cpumask_t mpc_mask = CPU_MASK_ALL;
-
/* Window size (in ns) */
__read_mostly unsigned int walt_ravg_window = 20000000;
@@ -111,8 +87,10 @@ walt_dec_cumulative_runnable_avg(struct rq *rq,
static void
fixup_cumulative_runnable_avg(struct rq *rq,
- struct task_struct *p, s64 task_load_delta)
+ struct task_struct *p, u64 new_task_load)
{
+ s64 task_load_delta = (s64)new_task_load - task_load(p);
+
rq->cumulative_runnable_avg += task_load_delta;
if ((s64)rq->cumulative_runnable_avg < 0)
panic("cra less than zero: tld: %lld, task_load(p) = %u\n",
@@ -204,24 +182,16 @@ update_window_start(struct rq *rq, u64 wallclock)
rq->window_start += (u64)nr_windows * (u64)walt_ravg_window;
}
+/*
+ * Translate absolute delta time accounted on a CPU
+ * to a scale where 1024 is the capacity of the most
+ * capable CPU running at FMAX
+ */
static u64 scale_exec_time(u64 delta, struct rq *rq)
{
- unsigned int cur_freq = rq->cur_freq;
- int sf;
-
- if (unlikely(cur_freq > max_possible_freq))
- cur_freq = rq->max_possible_freq;
-
- /* round up div64 */
- delta = div64_u64(delta * cur_freq + max_possible_freq - 1,
- max_possible_freq);
-
- sf = DIV_ROUND_UP(rq->efficiency * 1024, max_possible_efficiency);
+ unsigned long capcurr = capacity_curr_of(cpu_of(rq));
- delta *= sf;
- delta >>= 10;
-
- return delta;
+ return (delta * capcurr) >> SCHED_CAPACITY_SHIFT;
}
static int cpu_is_waiting_on_io(struct rq *rq)
@@ -744,33 +714,6 @@ done:
p->ravg.mark_start = wallclock;
}
-unsigned long __weak arch_get_cpu_efficiency(int cpu)
-{
- return SCHED_LOAD_SCALE;
-}
-
-void walt_init_cpu_efficiency(void)
-{
- int i, efficiency;
- unsigned int max = 0, min = UINT_MAX;
-
- for_each_possible_cpu(i) {
- efficiency = arch_get_cpu_efficiency(i);
- cpu_rq(i)->efficiency = efficiency;
-
- if (efficiency > max)
- max = efficiency;
- if (efficiency < min)
- min = efficiency;
- }
-
- if (max)
- max_possible_efficiency = max;
-
- if (min)
- min_possible_efficiency = min;
-}
-
static void reset_task_stats(struct task_struct *p)
{
u32 sum = 0;
@@ -802,11 +745,11 @@ void walt_set_window_start(struct rq *rq)
int cpu = cpu_of(rq);
struct rq *sync_rq = cpu_rq(sync_cpu);
- if (rq->window_start)
+ if (likely(rq->window_start))
return;
if (cpu == sync_cpu) {
- rq->window_start = walt_ktime_clock();
+ rq->window_start = 1;
} else {
raw_spin_unlock(&rq->lock);
double_rq_lock(rq, sync_rq);
@@ -875,242 +818,6 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu)
double_rq_unlock(src_rq, dest_rq);
}
-/*
- * Return 'capacity' of a cpu in reference to "least" efficient cpu, such that
- * least efficient cpu gets capacity of 1024
- */
-static unsigned long capacity_scale_cpu_efficiency(int cpu)
-{
- return (1024 * cpu_rq(cpu)->efficiency) / min_possible_efficiency;
-}
-
-/*
- * Return 'capacity' of a cpu in reference to cpu with lowest max_freq
- * (min_max_freq), such that one with lowest max_freq gets capacity of 1024.
- */
-static unsigned long capacity_scale_cpu_freq(int cpu)
-{
- return (1024 * cpu_rq(cpu)->max_freq) / min_max_freq;
-}
-
-/*
- * Return load_scale_factor of a cpu in reference to "most" efficient cpu, so
- * that "most" efficient cpu gets a load_scale_factor of 1
- */
-static unsigned long load_scale_cpu_efficiency(int cpu)
-{
- return DIV_ROUND_UP(1024 * max_possible_efficiency,
- cpu_rq(cpu)->efficiency);
-}
-
-/*
- * Return load_scale_factor of a cpu in reference to cpu with best max_freq
- * (max_possible_freq), so that one with best max_freq gets a load_scale_factor
- * of 1.
- */
-static unsigned long load_scale_cpu_freq(int cpu)
-{
- return DIV_ROUND_UP(1024 * max_possible_freq, cpu_rq(cpu)->max_freq);
-}
-
-static int compute_capacity(int cpu)
-{
- int capacity = 1024;
-
- capacity *= capacity_scale_cpu_efficiency(cpu);
- capacity >>= 10;
-
- capacity *= capacity_scale_cpu_freq(cpu);
- capacity >>= 10;
-
- return capacity;
-}
-
-static int compute_load_scale_factor(int cpu)
-{
- int load_scale = 1024;
-
- /*
- * load_scale_factor accounts for the fact that task load
- * is in reference to "best" performing cpu. Task's load will need to be
- * scaled (up) by a factor to determine suitability to be placed on a
- * (little) cpu.
- */
- load_scale *= load_scale_cpu_efficiency(cpu);
- load_scale >>= 10;
-
- load_scale *= load_scale_cpu_freq(cpu);
- load_scale >>= 10;
-
- return load_scale;
-}
-
-static int cpufreq_notifier_policy(struct notifier_block *nb,
- unsigned long val, void *data)
-{
- struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
- int i, update_max = 0;
- u64 highest_mpc = 0, highest_mplsf = 0;
- const struct cpumask *cpus = policy->related_cpus;
- unsigned int orig_min_max_freq = min_max_freq;
- unsigned int orig_max_possible_freq = max_possible_freq;
- /* Initialized to policy->max in case policy->related_cpus is empty! */
- unsigned int orig_max_freq = policy->max;
-
- if (val != CPUFREQ_NOTIFY)
- return 0;
-
- for_each_cpu(i, policy->related_cpus) {
- cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
- policy->related_cpus);
- orig_max_freq = cpu_rq(i)->max_freq;
- cpu_rq(i)->min_freq = policy->min;
- cpu_rq(i)->max_freq = policy->max;
- cpu_rq(i)->cur_freq = policy->cur;
- cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq;
- }
-
- max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq);
- if (min_max_freq == 1)
- min_max_freq = UINT_MAX;
- min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq);
- BUG_ON(!min_max_freq);
- BUG_ON(!policy->max);
-
- /* Changes to policy other than max_freq don't require any updates */
- if (orig_max_freq == policy->max)
- return 0;
-
- /*
- * A changed min_max_freq or max_possible_freq (possible during bootup)
- * needs to trigger re-computation of load_scale_factor and capacity for
- * all possible cpus (even those offline). It also needs to trigger
- * re-computation of nr_big_task count on all online cpus.
- *
- * A changed rq->max_freq otoh needs to trigger re-computation of
- * load_scale_factor and capacity for just the cluster of cpus involved.
- * Since small task definition depends on max_load_scale_factor, a
- * changed load_scale_factor of one cluster could influence
- * classification of tasks in another cluster. Hence a changed
- * rq->max_freq will need to trigger re-computation of nr_big_task
- * count on all online cpus.
- *
- * While it should be sufficient for nr_big_tasks to be
- * re-computed for only online cpus, we have inadequate context
- * information here (in policy notifier) with regard to hotplug-safety
- * context in which notification is issued. As a result, we can't use
- * get_online_cpus() here, as it can lead to deadlock. Until cpufreq is
- * fixed up to issue notification always in hotplug-safe context,
- * re-compute nr_big_task for all possible cpus.
- */
-
- if (orig_min_max_freq != min_max_freq ||
- orig_max_possible_freq != max_possible_freq) {
- cpus = cpu_possible_mask;
- update_max = 1;
- }
-
- /*
- * Changed load_scale_factor can trigger reclassification of tasks as
- * big or small. Make this change "atomic" so that tasks are accounted
- * properly due to changed load_scale_factor
- */
- for_each_cpu(i, cpus) {
- struct rq *rq = cpu_rq(i);
-
- rq->capacity = compute_capacity(i);
- rq->load_scale_factor = compute_load_scale_factor(i);
-
- if (update_max) {
- u64 mpc, mplsf;
-
- mpc = div_u64(((u64) rq->capacity) *
- rq->max_possible_freq, rq->max_freq);
- rq->max_possible_capacity = (int) mpc;
-
- mplsf = div_u64(((u64) rq->load_scale_factor) *
- rq->max_possible_freq, rq->max_freq);
-
- if (mpc > highest_mpc) {
- highest_mpc = mpc;
- cpumask_clear(&mpc_mask);
- cpumask_set_cpu(i, &mpc_mask);
- } else if (mpc == highest_mpc) {
- cpumask_set_cpu(i, &mpc_mask);
- }
-
- if (mplsf > highest_mplsf)
- highest_mplsf = mplsf;
- }
- }
-
- if (update_max) {
- max_possible_capacity = highest_mpc;
- max_load_scale_factor = highest_mplsf;
- }
-
- return 0;
-}
-
-static int cpufreq_notifier_trans(struct notifier_block *nb,
- unsigned long val, void *data)
-{
- struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data;
- unsigned int cpu = freq->cpu, new_freq = freq->new;
- unsigned long flags;
- int i;
-
- if (val != CPUFREQ_POSTCHANGE)
- return 0;
-
- BUG_ON(!new_freq);
-
- if (cpu_rq(cpu)->cur_freq == new_freq)
- return 0;
-
- for_each_cpu(i, &cpu_rq(cpu)->freq_domain_cpumask) {
- struct rq *rq = cpu_rq(i);
-
- raw_spin_lock_irqsave(&rq->lock, flags);
- walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
- walt_ktime_clock(), 0);
- rq->cur_freq = new_freq;
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- }
-
- return 0;
-}
-
-static struct notifier_block notifier_policy_block = {
- .notifier_call = cpufreq_notifier_policy
-};
-
-static struct notifier_block notifier_trans_block = {
- .notifier_call = cpufreq_notifier_trans
-};
-
-static int register_sched_callback(void)
-{
- int ret;
-
- ret = cpufreq_register_notifier(&notifier_policy_block,
- CPUFREQ_POLICY_NOTIFIER);
-
- if (!ret)
- ret = cpufreq_register_notifier(&notifier_trans_block,
- CPUFREQ_TRANSITION_NOTIFIER);
-
- return 0;
-}
-
-/*
- * cpufreq callbacks can be registered at core_initcall or later time.
- * Any registration done prior to that is "forgotten" by cpufreq. See
- * initialization of variable init_cpufreq_transition_notifier_list_called
- * for further information.
- */
-core_initcall(register_sched_callback);
-
void walt_init_new_task_load(struct task_struct *p)
{
int i;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 15a1795bbba1..efd384f3f852 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -457,14 +457,19 @@ static long seccomp_attach_filter(unsigned int flags,
return 0;
}
+void __get_seccomp_filter(struct seccomp_filter *filter)
+{
+ /* Reference count is bounded by the number of total processes. */
+ atomic_inc(&filter->usage);
+}
+
/* get_seccomp_filter - increments the reference count of the filter on @tsk */
void get_seccomp_filter(struct task_struct *tsk)
{
struct seccomp_filter *orig = tsk->seccomp.filter;
if (!orig)
return;
- /* Reference count is bounded by the number of total processes. */
- atomic_inc(&orig->usage);
+ __get_seccomp_filter(orig);
}
static inline void seccomp_filter_free(struct seccomp_filter *filter)
@@ -475,10 +480,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter)
}
}
-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
-void put_seccomp_filter(struct task_struct *tsk)
+static void __put_seccomp_filter(struct seccomp_filter *orig)
{
- struct seccomp_filter *orig = tsk->seccomp.filter;
/* Clean up single-reference branches iteratively. */
while (orig && atomic_dec_and_test(&orig->usage)) {
struct seccomp_filter *freeme = orig;
@@ -487,6 +490,12 @@ void put_seccomp_filter(struct task_struct *tsk)
}
}
+/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
+void put_seccomp_filter(struct task_struct *tsk)
+{
+ __put_seccomp_filter(tsk->seccomp.filter);
+}
+
/**
* seccomp_send_sigsys - signals the task to allow in-process syscall emulation
* @syscall: syscall number to send to userland
@@ -927,13 +936,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
if (!data)
goto out;
- get_seccomp_filter(task);
+ __get_seccomp_filter(filter);
spin_unlock_irq(&task->sighand->siglock);
if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
ret = -EFAULT;
- put_seccomp_filter(task);
+ __put_seccomp_filter(filter);
return ret;
out:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8576e6385d63..3fbe2765f307 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1413,6 +1413,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = timer_migration_handler,
+ .extra1 = &zero,
+ .extra2 = &one,
},
#endif
#ifdef CONFIG_BPF_SYSCALL
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 90a82deece45..903705687b52 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -131,7 +131,7 @@ int timer_migration_handler(struct ctl_table *table, int write,
int ret;
mutex_lock(&mutex);
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
timers_update_migration(false);
mutex_unlock(&mutex);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 048bf074bef9..3c7b7a9bcad1 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -190,6 +190,17 @@ config FUNCTION_GRAPH_TRACER
address on the current task structure into a stack of calls.
+config PREEMPTIRQ_EVENTS
+ bool "Enable trace events for preempt and irq disable/enable"
+ select TRACE_IRQFLAGS
+ depends on DEBUG_PREEMPT || !PROVE_LOCKING
+ default n
+ help
+ Enable tracing of disable and enable events for preemption and irqs.
+ For tracing preempt disable/enable events, DEBUG_PREEMPT must be
+ enabled. For tracing irq disable/enable events, PROVE_LOCKING must
+ be disabled.
+
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2963266fb7bf..a0177ae43058 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
+obj-$(CONFIG_PREEMPTIRQ_EVENTS) += trace_irqsoff.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eba904bae48c..fc0051fd672d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2667,13 +2667,14 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
if (!command || !ftrace_enabled) {
/*
- * If these are control ops, they still need their
- * per_cpu field freed. Since, function tracing is
+ * If these are dynamic or control ops, they still
+ * need their data freed. Since, function tracing is
* not currently active, we can just free them
* without synchronizing all CPUs.
*/
- if (ops->flags & FTRACE_OPS_FL_CONTROL)
- control_ops_free(ops);
+ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL))
+ goto free_ops;
+
return 0;
}
@@ -2728,6 +2729,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) {
schedule_on_each_cpu(ftrace_sync);
+ free_ops:
arch_ftrace_trampoline_free(ops);
if (ops->flags & FTRACE_OPS_FL_CONTROL)
@@ -4313,9 +4315,6 @@ static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer);
-static unsigned long save_global_trampoline;
-static unsigned long save_global_flags;
-
static int __init set_graph_function(char *str)
{
strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -5905,17 +5904,6 @@ void unregister_ftrace_graph(void)
unregister_pm_notifier(&ftrace_suspend_notifier);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
-#ifdef CONFIG_DYNAMIC_FTRACE
- /*
- * Function graph does not allocate the trampoline, but
- * other global_ops do. We need to reset the ALLOC_TRAMP flag
- * if one was used.
- */
- global_ops.trampoline = save_global_trampoline;
- if (save_global_flags & FTRACE_OPS_FL_ALLOC_TRAMP)
- global_ops.flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
-#endif
-
out:
mutex_unlock(&ftrace_lock);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a579a874045b..6580ec6bc371 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3303,11 +3303,17 @@ static int tracing_open(struct inode *inode, struct file *file)
/* If this file was open for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
int cpu = tracing_get_cpu(inode);
+ struct trace_buffer *trace_buf = &tr->trace_buffer;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ if (tr->current_trace->print_max)
+ trace_buf = &tr->max_buffer;
+#endif
if (cpu == RING_BUFFER_ALL_CPUS)
- tracing_reset_online_cpus(&tr->trace_buffer);
+ tracing_reset_online_cpus(trace_buf);
else
- tracing_reset(&tr->trace_buffer, cpu);
+ tracing_reset(trace_buf, cpu);
}
if (file->f_mode & FMODE_READ) {
@@ -4858,7 +4864,7 @@ static int tracing_wait_pipe(struct file *filp)
*
* iter->pos will be 0 if we haven't read anything.
*/
- if (!tracing_is_on() && iter->pos)
+ if (!tracer_tracing_is_on(iter->tr) && iter->pos)
break;
mutex_unlock(&iter->mutex);
@@ -5397,7 +5403,7 @@ static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
tracing_reset_online_cpus(&tr->trace_buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
- if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
+ if (tr->max_buffer.buffer)
ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
tracing_reset_online_cpus(&tr->max_buffer);
#endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index be3222b7d72e..21b162c07e83 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -16,6 +16,10 @@
#include "trace.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/preemptirq.h>
+
+#if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER)
static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;
@@ -451,63 +455,43 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
#else /* !CONFIG_PROVE_LOCKING */
/*
- * Stubs:
- */
-
-void trace_softirqs_on(unsigned long ip)
-{
-}
-
-void trace_softirqs_off(unsigned long ip)
-{
-}
-
-inline void print_irqtrace_events(struct task_struct *curr)
-{
-}
-
-/*
* We are only interested in hardirq on/off events:
*/
-void trace_hardirqs_on(void)
+static inline void tracer_hardirqs_on(void)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
-EXPORT_SYMBOL(trace_hardirqs_on);
-void trace_hardirqs_off(void)
+static inline void tracer_hardirqs_off(void)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
-EXPORT_SYMBOL(trace_hardirqs_off);
-__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
+static inline void tracer_hardirqs_on_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, caller_addr);
}
-EXPORT_SYMBOL(trace_hardirqs_on_caller);
-__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
+static inline void tracer_hardirqs_off_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, caller_addr);
}
-EXPORT_SYMBOL(trace_hardirqs_off_caller);
#endif /* CONFIG_PROVE_LOCKING */
#endif /* CONFIG_IRQSOFF_TRACER */
#ifdef CONFIG_PREEMPT_TRACER
-void trace_preempt_on(unsigned long a0, unsigned long a1)
+static inline void tracer_preempt_on(unsigned long a0, unsigned long a1)
{
if (preempt_trace() && !irq_trace())
stop_critical_timing(a0, a1);
}
-void trace_preempt_off(unsigned long a0, unsigned long a1)
+static inline void tracer_preempt_off(unsigned long a0, unsigned long a1)
{
if (preempt_trace() && !irq_trace())
start_critical_timing(a0, a1);
@@ -770,3 +754,100 @@ __init static int init_irqsoff_tracer(void)
return 0;
}
core_initcall(init_irqsoff_tracer);
+#endif /* IRQSOFF_TRACER || PREEMPTOFF_TRACER */
+
+#ifndef CONFIG_IRQSOFF_TRACER
+static inline void tracer_hardirqs_on(void) { }
+static inline void tracer_hardirqs_off(void) { }
+static inline void tracer_hardirqs_on_caller(unsigned long caller_addr) { }
+static inline void tracer_hardirqs_off_caller(unsigned long caller_addr) { }
+#endif
+
+#ifndef CONFIG_PREEMPT_TRACER
+static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { }
+static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { }
+#endif
+
+/* Per-cpu variable to prevent redundant calls when IRQs already off */
+static DEFINE_PER_CPU(int, tracing_irq_cpu);
+
+#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING)
+void trace_hardirqs_on(void)
+{
+ if (!this_cpu_read(tracing_irq_cpu))
+ return;
+
+ trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+ tracer_hardirqs_on();
+
+ this_cpu_write(tracing_irq_cpu, 0);
+}
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+void trace_hardirqs_off(void)
+{
+ if (this_cpu_read(tracing_irq_cpu))
+ return;
+
+ this_cpu_write(tracing_irq_cpu, 1);
+
+ trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+ tracer_hardirqs_off();
+}
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
+{
+ if (!this_cpu_read(tracing_irq_cpu))
+ return;
+
+ trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
+ tracer_hardirqs_on_caller(caller_addr);
+
+ this_cpu_write(tracing_irq_cpu, 0);
+}
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
+
+__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
+{
+ if (this_cpu_read(tracing_irq_cpu))
+ return;
+
+ this_cpu_write(tracing_irq_cpu, 1);
+
+ trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
+ tracer_hardirqs_off_caller(caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
+
+/*
+ * Stubs:
+ */
+
+void trace_softirqs_on(unsigned long ip)
+{
+}
+
+void trace_softirqs_off(unsigned long ip)
+{
+}
+
+inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
+#endif
+
+#if defined(CONFIG_PREEMPT_TRACER) || \
+ (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
+void trace_preempt_on(unsigned long a0, unsigned long a1)
+{
+ trace_preempt_enable_rcuidle(a0, a1);
+ tracer_preempt_on(a0, a1);
+}
+
+void trace_preempt_off(unsigned long a0, unsigned long a1)
+{
+ trace_preempt_disable_rcuidle(a0, a1);
+ tracer_preempt_off(a0, a1);
+}
+#endif
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b0f86ea77881..ca70d11b8aa7 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -272,7 +272,7 @@ static int trace_selftest_ops(struct trace_array *tr, int cnt)
goto out_free;
if (cnt > 1) {
if (trace_selftest_test_global_cnt == 0)
- goto out;
+ goto out_free;
}
if (trace_selftest_test_dyn_cnt == 0)
goto out_free;