summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c8
-rw-r--r--kernel/audit_watch.c8
-rw-r--r--kernel/auditsc.c12
-rw-r--r--kernel/capability.c36
-rw-r--r--kernel/cgroup.c110
-rw-r--r--kernel/configs/tiny.config8
-rw-r--r--kernel/cpu.c23
-rw-r--r--kernel/cpuset.c25
-rw-r--r--kernel/debug/debug_core.c4
-rw-r--r--kernel/events/core.c65
-rw-r--r--kernel/fork.c42
-rw-r--r--kernel/irq/generic-chip.c21
-rw-r--r--kernel/irq/msi.c1
-rw-r--r--kernel/jump_label.c7
-rw-r--r--kernel/kexec_file.c3
-rw-r--r--kernel/locking/rtmutex.c68
-rw-r--r--kernel/locking/rtmutex_common.h5
-rw-r--r--kernel/memremap.c4
-rw-r--r--kernel/power/hibernate.c4
-rw-r--r--kernel/power/main.c17
-rw-r--r--kernel/power/power.h9
-rw-r--r--kernel/power/snapshot.c10
-rw-r--r--kernel/power/suspend_test.c4
-rw-r--r--kernel/power/swap.c18
-rw-r--r--kernel/printk/braille.c4
-rw-r--r--kernel/ptrace.c28
-rw-r--r--kernel/rcu/tree.c12
-rw-r--r--kernel/rcu/tree_plugin.h1
-rw-r--r--kernel/sched/core.c62
-rw-r--r--kernel/sched/core_ctl.c55
-rw-r--r--kernel/sched/cpufreq_sched.c4
-rw-r--r--kernel/sched/fair.c20
-rw-r--r--kernel/sched/features.h4
-rw-r--r--kernel/sched/hmp.c4
-rw-r--r--kernel/sched/rt.c86
-rw-r--r--kernel/sched/sched.h38
-rw-r--r--kernel/sched/tune.c2
-rw-r--r--kernel/sched/walt.c52
-rw-r--r--kernel/sysctl.c46
-rw-r--r--kernel/time/alarmtimer.c8
-rw-r--r--kernel/time/clocksource.c48
-rw-r--r--kernel/time/tick-broadcast.c3
-rw-r--r--kernel/time/timekeeping.c61
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/trace.c31
-rw-r--r--kernel/trace/trace_functions_graph.c17
-rw-r--r--kernel/watchdog.c15
47 files changed, 720 insertions, 397 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 5ffcbd354a52..34f690b9213a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -870,6 +870,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
}
if (s.mask & AUDIT_STATUS_PID) {
+ /* NOTE: we are using task_tgid_vnr() below because
+ * the s.pid value is relative to the namespace
+ * of the caller; at present this doesn't matter
+ * much since you can really only run auditd
+ * from the initial pid namespace, but something
+ * to keep in mind if this changes */
int new_pid = s.pid;
if ((!new_pid) && (task_tgid_vnr(current) != audit_pid))
@@ -1896,7 +1902,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
" euid=%u suid=%u fsuid=%u"
" egid=%u sgid=%u fsgid=%u tty=%s ses=%u",
task_ppid_nr(tsk),
- task_pid_nr(tsk),
+ task_tgid_nr(tsk),
from_kuid(&init_user_ns, audit_get_loginuid(tsk)),
from_kuid(&init_user_ns, cred->uid),
from_kgid(&init_user_ns, cred->gid),
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 656c7e93ac0d..939945a5649c 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -19,6 +19,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/file.h>
#include <linux/kernel.h>
#include <linux/audit.h>
#include <linux/kthread.h>
@@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
unsigned long ino;
dev_t dev;
- rcu_read_lock();
- exe_file = rcu_dereference(tsk->mm->exe_file);
+ exe_file = get_task_exe_file(tsk);
+ if (!exe_file)
+ return 0;
ino = exe_file->f_inode->i_ino;
dev = exe_file->f_inode->i_sb->s_dev;
- rcu_read_unlock();
+ fput(exe_file);
return audit_mark_compare(mark, ino, dev);
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 48f45987dc6c..63f0e495f517 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -458,7 +458,7 @@ static int audit_filter_rules(struct task_struct *tsk,
switch (f->type) {
case AUDIT_PID:
- pid = task_pid_nr(tsk);
+ pid = task_tgid_nr(tsk);
result = audit_comparator(pid, f->op, f->val);
break;
case AUDIT_PPID:
@@ -1987,7 +1987,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
if (!ab)
return;
- audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid);
+ audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid);
audit_log_task_context(ab);
audit_log_format(ab, " old-auid=%u auid=%u old-ses=%u ses=%u res=%d",
oldloginuid, loginuid, oldsessionid, sessionid, !rc);
@@ -2212,7 +2212,7 @@ void __audit_ptrace(struct task_struct *t)
{
struct audit_context *context = current->audit_context;
- context->target_pid = task_pid_nr(t);
+ context->target_pid = task_tgid_nr(t);
context->target_auid = audit_get_loginuid(t);
context->target_uid = task_uid(t);
context->target_sessionid = audit_get_sessionid(t);
@@ -2237,7 +2237,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
if (audit_pid && t->tgid == audit_pid) {
if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
- audit_sig_pid = task_pid_nr(tsk);
+ audit_sig_pid = task_tgid_nr(tsk);
if (uid_valid(tsk->loginuid))
audit_sig_uid = tsk->loginuid;
else
@@ -2337,7 +2337,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
void __audit_log_capset(const struct cred *new, const struct cred *old)
{
struct audit_context *context = current->audit_context;
- context->capset.pid = task_pid_nr(current);
+ context->capset.pid = task_tgid_nr(current);
context->capset.cap.effective = new->cap_effective;
context->capset.cap.inheritable = new->cap_effective;
context->capset.cap.permitted = new->cap_permitted;
@@ -2369,7 +2369,7 @@ static void audit_log_task(struct audit_buffer *ab)
from_kgid(&init_user_ns, gid),
sessionid);
audit_log_task_context(ab);
- audit_log_format(ab, " pid=%d comm=", task_pid_nr(current));
+ audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current));
audit_log_untrustedstring(ab, get_task_comm(comm, current));
audit_log_d_path_exe(ab, current->mm);
}
diff --git a/kernel/capability.c b/kernel/capability.c
index 00411c82dac5..4984e1f552eb 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -457,6 +457,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
EXPORT_SYMBOL(file_ns_capable);
/**
+ * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
+ * @ns: The user namespace in question
+ * @inode: The inode in question
+ *
+ * Return true if the inode uid and gid are within the namespace.
+ */
+bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
+{
+ return kuid_has_mapping(ns, inode->i_uid) &&
+ kgid_has_mapping(ns, inode->i_gid);
+}
+
+/**
* capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
* @inode: The inode in question
* @cap: The capability in question
@@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
{
struct user_namespace *ns = current_user_ns();
- return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
- kgid_has_mapping(ns, inode->i_gid);
+ return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
}
EXPORT_SYMBOL(capable_wrt_inode_uidgid);
+
+/**
+ * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
+ * @tsk: The task that may be ptraced
+ * @ns: The user namespace to search for CAP_SYS_PTRACE in
+ *
+ * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
+ * in the specified user namespace.
+ */
+bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
+{
+ int ret = 0; /* An absent tracer adds no restrictions */
+ const struct cred *cred;
+ rcu_read_lock();
+ cred = rcu_dereference(tsk->ptracer_cred);
+ if (cred)
+ ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
+ rcu_read_unlock();
+ return (ret == 0);
+}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e94c3c189338..b05fc202b548 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -211,6 +211,7 @@ static unsigned long have_free_callback __read_mostly;
/* Ditto for the can_fork callback. */
static unsigned long have_canfork_callback __read_mostly;
+static struct file_system_type cgroup2_fs_type;
static struct cftype cgroup_dfl_base_files[];
static struct cftype cgroup_legacy_base_files[];
@@ -236,6 +237,9 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
*/
static bool cgroup_ssid_enabled(int ssid)
{
+ if (CGROUP_SUBSYS_COUNT == 0)
+ return false;
+
return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
}
@@ -1649,10 +1653,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
all_ss = true;
continue;
}
- if (!strcmp(token, "__DEVEL__sane_behavior")) {
- opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
- continue;
- }
if (!strcmp(token, "noprefix")) {
opts->flags |= CGRP_ROOT_NOPREFIX;
continue;
@@ -1719,15 +1719,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
return -ENOENT;
}
- if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
- pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
- if (nr_opts != 1) {
- pr_err("sane_behavior: no other mount options allowed\n");
- return -EINVAL;
- }
- return 0;
- }
-
/*
* If the 'all' option was specified select all the subsystems,
* otherwise if 'none', 'name=' and a subsystem name options were
@@ -2010,6 +2001,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
{
+ bool is_v2 = fs_type == &cgroup2_fs_type;
struct super_block *pinned_sb = NULL;
struct cgroup_subsys *ss;
struct cgroup_root *root = NULL;
@@ -2026,6 +2018,17 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (!use_task_css_set_links)
cgroup_enable_task_cg_lists();
+ if (is_v2) {
+ if (data) {
+ pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
+ return ERR_PTR(-EINVAL);
+ }
+ cgrp_dfl_root_visible = true;
+ root = &cgrp_dfl_root;
+ cgroup_get(&root->cgrp);
+ goto out_mount;
+ }
+
mutex_lock(&cgroup_mutex);
/* First find the desired set of subsystems */
@@ -2033,15 +2036,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (ret)
goto out_unlock;
- /* look for a matching existing root */
- if (opts.flags & CGRP_ROOT_SANE_BEHAVIOR) {
- cgrp_dfl_root_visible = true;
- root = &cgrp_dfl_root;
- cgroup_get(&root->cgrp);
- ret = 0;
- goto out_unlock;
- }
-
/*
* Destruction of cgroup root is asynchronous, so subsystems may
* still be dying after the previous unmount. Let's drain the
@@ -2152,9 +2146,10 @@ out_free:
if (ret)
return ERR_PTR(ret);
-
+out_mount:
dentry = kernfs_mount(fs_type, flags, root->kf_root,
- CGROUP_SUPER_MAGIC, &new_sb);
+ is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
+ &new_sb);
if (IS_ERR(dentry) || !new_sb)
cgroup_put(&root->cgrp);
@@ -2197,6 +2192,12 @@ static struct file_system_type cgroup_fs_type = {
.kill_sb = cgroup_kill_sb,
};
+static struct file_system_type cgroup2_fs_type = {
+ .name = "cgroup2",
+ .mount = cgroup_mount,
+ .kill_sb = cgroup_kill_sb,
+};
+
/**
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
* @task: target task
@@ -2677,45 +2678,6 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
return ret;
}
-int subsys_cgroup_allow_attach(struct cgroup_taskset *tset)
-{
- const struct cred *cred = current_cred(), *tcred;
- struct task_struct *task;
- struct cgroup_subsys_state *css;
-
- if (capable(CAP_SYS_NICE))
- return 0;
-
- cgroup_taskset_for_each(task, css, tset) {
- tcred = __task_cred(task);
-
- if (current != task && !uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->euid, tcred->suid))
- return -EACCES;
- }
-
- return 0;
-}
-
-static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
-{
- struct cgroup_subsys_state *css;
- int i;
- int ret;
-
- for_each_css(css, i, cgrp) {
- if (css->ss->allow_attach) {
- ret = css->ss->allow_attach(tset);
- if (ret)
- return ret;
- } else {
- return -EACCES;
- }
- }
-
- return 0;
-}
-
static int cgroup_procs_write_permission(struct task_struct *task,
struct cgroup *dst_cgrp,
struct kernfs_open_file *of)
@@ -2730,24 +2692,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
*/
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->euid, tcred->suid)) {
- /*
- * if the default permission check fails, give each
- * cgroup a chance to extend the permission check
- */
- struct cgroup_taskset tset = {
- .src_csets = LIST_HEAD_INIT(tset.src_csets),
- .dst_csets = LIST_HEAD_INIT(tset.dst_csets),
- .csets = &tset.src_csets,
- };
- struct css_set *cset;
- cset = task_css_set(task);
- list_add(&cset->mg_node, &tset.src_csets);
- ret = cgroup_allow_attach(dst_cgrp, &tset);
- list_del(&tset.src_csets);
- if (ret)
- ret = -EACCES;
- }
+ !uid_eq(cred->euid, tcred->suid) &&
+ !ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
+ ret = -EACCES;
if (!ret && cgroup_on_dfl(dst_cgrp)) {
struct super_block *sb = of->file->f_path.dentry->d_sb;
@@ -5447,6 +5394,7 @@ int __init cgroup_init(void)
WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
WARN_ON(register_filesystem(&cgroup_fs_type));
+ WARN_ON(register_filesystem(&cgroup2_fs_type));
WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
return 0;
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index c2de56ab0fce..7fa0c4ae6394 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -1,4 +1,12 @@
+# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_KERNEL_GZIP is not set
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
CONFIG_KERNEL_XZ=y
+# CONFIG_KERNEL_LZO is not set
+# CONFIG_KERNEL_LZ4 is not set
CONFIG_OPTIMIZE_INLINING=y
+# CONFIG_SLAB is not set
+# CONFIG_SLUB is not set
CONFIG_SLOB=y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 25cfcc804077..8b6940755e4a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -185,10 +185,17 @@ void cpu_hotplug_disable(void)
}
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
+static void __cpu_hotplug_enable(void)
+{
+ if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
+ return;
+ cpu_hotplug_disabled--;
+}
+
void cpu_hotplug_enable(void)
{
cpu_maps_update_begin();
- WARN_ON(--cpu_hotplug_disabled < 0);
+ __cpu_hotplug_enable();
cpu_maps_update_done();
}
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
@@ -225,12 +232,6 @@ static int cpu_notify(unsigned long val, void *v)
return __cpu_notify(val, v, -1, NULL);
}
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void cpu_notify_nofail(unsigned long val, void *v)
-{
- BUG_ON(cpu_notify(val, v));
-}
EXPORT_SYMBOL(register_cpu_notifier);
EXPORT_SYMBOL(__register_cpu_notifier);
@@ -248,6 +249,12 @@ void __unregister_cpu_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(__unregister_cpu_notifier);
+#ifdef CONFIG_HOTPLUG_CPU
+static void cpu_notify_nofail(unsigned long val, void *v)
+{
+ BUG_ON(cpu_notify(val, v));
+}
+
/**
* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
* @cpu: a CPU id
@@ -616,7 +623,7 @@ void enable_nonboot_cpus(void)
/* Allow everyone to use the CPU hotplug again */
cpu_maps_update_begin();
- WARN_ON(--cpu_hotplug_disabled < 0);
+ __cpu_hotplug_enable();
if (cpumask_empty(frozen_cpus))
goto out;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e3c0f38acbe6..29c7240172d3 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2095,21 +2095,18 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
mutex_unlock(&cpuset_mutex);
}
-static int cpuset_allow_attach(struct cgroup_taskset *tset)
+/*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+ * state from the parent and before it sits on the cgroup's task list.
+ */
+void cpuset_fork(struct task_struct *task, void *priv)
{
- const struct cred *cred = current_cred(), *tcred;
- struct task_struct *task;
- struct cgroup_subsys_state *css;
-
- cgroup_taskset_for_each(task, css, tset) {
- tcred = __task_cred(task);
-
- if ((current != task) && !capable(CAP_SYS_ADMIN) &&
- cred->euid.val != tcred->uid.val && cred->euid.val != tcred->suid.val)
- return -EACCES;
- }
+ if (task_css_is_root(task, cpuset_cgrp_id))
+ return;
- return 0;
+ set_cpus_allowed_ptr(task, &current->cpus_allowed);
+ task->mems_allowed = current->mems_allowed;
}
struct cgroup_subsys cpuset_cgrp_subsys = {
@@ -2118,11 +2115,11 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.css_offline = cpuset_css_offline,
.css_free = cpuset_css_free,
.can_attach = cpuset_can_attach,
- .allow_attach = cpuset_allow_attach,
.cancel_attach = cpuset_cancel_attach,
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
+ .fork = cpuset_fork,
.legacy_cftypes = files,
.early_init = 1,
};
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0874e2edd275..79517e5549f1 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -598,11 +598,11 @@ return_normal:
/*
* Wait for the other CPUs to be notified and be waiting for us:
*/
- time_left = loops_per_jiffy * HZ;
+ time_left = MSEC_PER_SEC;
while (kgdb_do_roundup && --time_left &&
(atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) !=
online_cpus)
- cpu_relax();
+ udelay(1000);
if (!time_left)
pr_crit("Timed out waiting for secondary CPUs.\n");
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 424961e5bd80..f9c6f554460e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1554,12 +1554,33 @@ static int __init perf_workqueue_init(void)
core_initcall(perf_workqueue_init);
-static inline int pmu_filter_match(struct perf_event *event)
+static inline int __pmu_filter_match(struct perf_event *event)
{
struct pmu *pmu = event->pmu;
return pmu->filter_match ? pmu->filter_match(event) : 1;
}
+/*
+ * Check whether we should attempt to schedule an event group based on
+ * PMU-specific filtering. An event group can consist of HW and SW events,
+ * potentially with a SW leader, so we must check all the filters, to
+ * determine whether a group is schedulable:
+ */
+static inline int pmu_filter_match(struct perf_event *event)
+{
+ struct perf_event *child;
+
+ if (!__pmu_filter_match(event))
+ return 0;
+
+ list_for_each_entry(child, &event->sibling_list, group_entry) {
+ if (!__pmu_filter_match(child))
+ return 0;
+ }
+
+ return 1;
+}
+
static inline int
event_filter_match(struct perf_event *event)
{
@@ -6203,6 +6224,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
char *buf = NULL;
char *name;
+ if (vma->vm_flags & VM_READ)
+ prot |= PROT_READ;
+ if (vma->vm_flags & VM_WRITE)
+ prot |= PROT_WRITE;
+ if (vma->vm_flags & VM_EXEC)
+ prot |= PROT_EXEC;
+
+ if (vma->vm_flags & VM_MAYSHARE)
+ flags = MAP_SHARED;
+ else
+ flags = MAP_PRIVATE;
+
+ if (vma->vm_flags & VM_DENYWRITE)
+ flags |= MAP_DENYWRITE;
+ if (vma->vm_flags & VM_MAYEXEC)
+ flags |= MAP_EXECUTABLE;
+ if (vma->vm_flags & VM_LOCKED)
+ flags |= MAP_LOCKED;
+ if (vma->vm_flags & VM_HUGETLB)
+ flags |= MAP_HUGETLB;
+
if (file) {
struct inode *inode;
dev_t dev;
@@ -6229,27 +6271,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
maj = MAJOR(dev);
min = MINOR(dev);
- if (vma->vm_flags & VM_READ)
- prot |= PROT_READ;
- if (vma->vm_flags & VM_WRITE)
- prot |= PROT_WRITE;
- if (vma->vm_flags & VM_EXEC)
- prot |= PROT_EXEC;
-
- if (vma->vm_flags & VM_MAYSHARE)
- flags = MAP_SHARED;
- else
- flags = MAP_PRIVATE;
-
- if (vma->vm_flags & VM_DENYWRITE)
- flags |= MAP_DENYWRITE;
- if (vma->vm_flags & VM_MAYEXEC)
- flags |= MAP_EXECUTABLE;
- if (vma->vm_flags & VM_LOCKED)
- flags |= MAP_LOCKED;
- if (vma->vm_flags & VM_HUGETLB)
- flags |= MAP_HUGETLB;
-
goto got_name;
} else {
if (vma->vm_ops && vma->vm_ops->name) {
diff --git a/kernel/fork.c b/kernel/fork.c
index a46ce4505066..75573eeb49b2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -587,7 +587,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif
}
-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
+static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
+ struct user_namespace *user_ns)
{
mm->mmap = NULL;
mm->mm_rb = RB_ROOT;
@@ -627,6 +628,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
if (init_new_context(p, mm))
goto fail_nocontext;
+ mm->user_ns = get_user_ns(user_ns);
return mm;
fail_nocontext:
@@ -672,7 +674,7 @@ struct mm_struct *mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
- return mm_init(mm, current);
+ return mm_init(mm, current, current_user_ns());
}
/*
@@ -687,6 +689,7 @@ void __mmdrop(struct mm_struct *mm)
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
check_mm(mm);
+ put_user_ns(mm->user_ns);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -769,6 +772,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
EXPORT_SYMBOL(get_mm_exe_file);
/**
+ * get_task_exe_file - acquire a reference to the task's executable file
+ *
+ * Returns %NULL if task's mm (if any) has no associated executable file or
+ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+ * User must release file via fput().
+ */
+struct file *get_task_exe_file(struct task_struct *task)
+{
+ struct file *exe_file = NULL;
+ struct mm_struct *mm;
+
+ task_lock(task);
+ mm = task->mm;
+ if (mm) {
+ if (!(task->flags & PF_KTHREAD))
+ exe_file = get_mm_exe_file(mm);
+ }
+ task_unlock(task);
+ return exe_file;
+}
+EXPORT_SYMBOL(get_task_exe_file);
+
+/**
* get_task_mm - acquire a reference to the task's mm
*
* Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
@@ -884,14 +910,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
deactivate_mm(tsk, mm);
/*
- * If we're exiting normally, clear a user-space tid field if
- * requested. We leave this alone when dying by signal, to leave
- * the value intact in a core dump, and to save the unnecessary
- * trouble, say, a killed vfork parent shouldn't touch this mm.
- * Userland only wants this done for a sys_exit.
+ * Signal userspace if we're not exiting with a core dump
+ * because we want to leave the value intact for debugging
+ * purposes.
*/
if (tsk->clear_child_tid) {
- if (!(tsk->flags & PF_SIGNALED) &&
+ if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
atomic_read(&mm->mm_users) > 1) {
/*
* We don't check the error code - if userspace has
@@ -927,7 +951,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
memcpy(mm, oldmm, sizeof(*mm));
- if (!mm_init(mm, tsk))
+ if (!mm_init(mm, tsk, mm->user_ns))
goto fail_nomem;
err = dup_mmap(mm, oldmm);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index abd286afbd27..a4775f3451b9 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -411,8 +411,29 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
}
EXPORT_SYMBOL_GPL(irq_map_generic_chip);
+static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
+{
+ struct irq_data *data = irq_domain_get_irq_data(d, virq);
+ struct irq_domain_chip_generic *dgc = d->gc;
+ unsigned int hw_irq = data->hwirq;
+ struct irq_chip_generic *gc;
+ int irq_idx;
+
+ gc = irq_get_domain_generic_chip(d, hw_irq);
+ if (!gc)
+ return;
+
+ irq_idx = hw_irq % dgc->irqs_per_chip;
+
+ clear_bit(irq_idx, &gc->installed);
+ irq_domain_set_info(d, virq, hw_irq, &no_irq_chip, NULL, NULL, NULL,
+ NULL);
+
+}
+
struct irq_domain_ops irq_generic_chip_ops = {
.map = irq_map_generic_chip,
+ .unmap = irq_unmap_generic_chip,
.xlate = irq_domain_xlate_onetwocell,
};
EXPORT_SYMBOL_GPL(irq_generic_chip_ops);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 4b21779d5163..cd6009006510 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -298,6 +298,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
ops->msi_finish(&arg, 0);
for_each_msi_entry(desc, dev) {
+ virq = desc->irq;
if (desc->nvec_used == 1)
dev_dbg(dev, "irq %d for MSI\n", virq);
else
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 4b353e0be121..453ec4232852 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -138,6 +138,13 @@ void static_key_slow_dec_deferred(struct static_key_deferred *key)
}
EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
+void static_key_deferred_flush(struct static_key_deferred *key)
+{
+ STATIC_KEY_CHECK_USE();
+ flush_delayed_work(&key->work);
+}
+EXPORT_SYMBOL_GPL(static_key_deferred_flush);
+
void jump_label_rate_limit(struct static_key_deferred *key,
unsigned long rl)
{
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index b70ada0028d2..6030efd4a188 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -934,7 +934,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
return 0;
out:
vfree(pi->sechdrs);
+ pi->sechdrs = NULL;
+
vfree(pi->purgatory_buf);
+ pi->purgatory_buf = NULL;
return ret;
}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8251e75dd9c0..b066724d7a5b 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -65,8 +65,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
{
- if (!rt_mutex_has_waiters(lock))
- clear_rt_mutex_waiters(lock);
+ unsigned long owner, *p = (unsigned long *) &lock->owner;
+
+ if (rt_mutex_has_waiters(lock))
+ return;
+
+ /*
+ * The rbtree has no waiters enqueued, now make sure that the
+ * lock->owner still has the waiters bit set, otherwise the
+ * following can happen:
+ *
+ * CPU 0 CPU 1 CPU2
+ * l->owner=T1
+ * rt_mutex_lock(l)
+ * lock(l->lock)
+ * l->owner = T1 | HAS_WAITERS;
+ * enqueue(T2)
+ * boost()
+ * unlock(l->lock)
+ * block()
+ *
+ * rt_mutex_lock(l)
+ * lock(l->lock)
+ * l->owner = T1 | HAS_WAITERS;
+ * enqueue(T3)
+ * boost()
+ * unlock(l->lock)
+ * block()
+ * signal(->T2) signal(->T3)
+ * lock(l->lock)
+ * dequeue(T2)
+ * deboost()
+ * unlock(l->lock)
+ * lock(l->lock)
+ * dequeue(T3)
+ * ==> wait list is empty
+ * deboost()
+ * unlock(l->lock)
+ * lock(l->lock)
+ * fixup_rt_mutex_waiters()
+ * if (wait_list_empty(l) {
+ * l->owner = owner
+ * owner = l->owner & ~HAS_WAITERS;
+ * ==> l->owner = T1
+ * }
+ * lock(l->lock)
+ * rt_mutex_unlock(l) fixup_rt_mutex_waiters()
+ * if (wait_list_empty(l) {
+ * owner = l->owner & ~HAS_WAITERS;
+ * cmpxchg(l->owner, T1, NULL)
+ * ===> Success (l->owner = NULL)
+ *
+ * l->owner = owner
+ * ==> l->owner = T1
+ * }
+ *
+ * With the check for the waiter bit in place T3 on CPU2 will not
+ * overwrite. All tasks fiddling with the waiters bit are
+ * serialized by l->lock, so nothing else can modify the waiters
+ * bit. If the bit is set then nothing can change l->owner either
+ * so the simple RMW is safe. The cmpxchg() will simply fail if it
+ * happens in the middle of the RMW because the waiters bit is
+ * still set.
+ */
+ owner = READ_ONCE(*p);
+ if (owner & RT_MUTEX_HAS_WAITERS)
+ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
}
/*
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 4f5f83c7d2d3..e317e1cbb3eb 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -75,8 +75,9 @@ task_top_pi_waiter(struct task_struct *p)
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
- return (struct task_struct *)
- ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
+ unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
+
+ return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL);
}
/*
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 25ced161ebeb..f719c925cb54 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -159,7 +159,9 @@ static void devm_memremap_pages_release(struct device *dev, void *res)
struct page_map *page_map = res;
/* pages are dead and unused, undo the arch mapping */
+ mem_hotplug_begin();
arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
+ mem_hotplug_done();
}
void *devm_memremap_pages(struct device *dev, struct resource *res)
@@ -189,7 +191,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
if (nid < 0)
nid = numa_mem_id();
+ mem_hotplug_begin();
error = arch_add_memory(nid, res->start, resource_size(res), true);
+ mem_hotplug_done();
if (error) {
devres_free(page_map);
return ERR_PTR(error);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index b7dd5718836e..3124cebaec31 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -299,12 +299,12 @@ static int create_image(int platform_mode)
save_processor_state();
trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
error = swsusp_arch_suspend();
+ /* Restore control flow magically appears here */
+ restore_processor_state();
trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
if (error)
printk(KERN_ERR "PM: Error %d creating hibernation image\n",
error);
- /* Restore control flow magically appears here */
- restore_processor_state();
if (!in_suspend)
events_check_enabled = false;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b2dd4d999900..27946975eff0 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -280,13 +280,7 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj,
return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA;
}
-static ssize_t pm_wakeup_irq_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t n)
-{
- return -EINVAL;
-}
-power_attr(pm_wakeup_irq);
+power_attr_ro(pm_wakeup_irq);
#else /* !CONFIG_PM_SLEEP_DEBUG */
static inline void pm_print_times_init(void) {}
@@ -564,14 +558,7 @@ static ssize_t pm_trace_dev_match_show(struct kobject *kobj,
return show_trace_dev_match(buf, PAGE_SIZE);
}
-static ssize_t
-pm_trace_dev_match_store(struct kobject *kobj, struct kobj_attribute *attr,
- const char *buf, size_t n)
-{
- return -EINVAL;
-}
-
-power_attr(pm_trace_dev_match);
+power_attr_ro(pm_trace_dev_match);
#endif /* CONFIG_PM_TRACE */
diff --git a/kernel/power/power.h b/kernel/power/power.h
index caadb566e82b..efe1b3b17c88 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -77,6 +77,15 @@ static struct kobj_attribute _name##_attr = { \
.store = _name##_store, \
}
+#define power_attr_ro(_name) \
+static struct kobj_attribute _name##_attr = { \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = S_IRUGO, \
+ }, \
+ .show = _name##_show, \
+}
+
/* Preferred image size in bytes (default 500 MB) */
extern unsigned long image_size;
/* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3a970604308f..f155c62f1f2c 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -765,9 +765,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
*/
static bool rtree_next_node(struct memory_bitmap *bm)
{
- bm->cur.node = list_entry(bm->cur.node->list.next,
- struct rtree_node, list);
- if (&bm->cur.node->list != &bm->cur.zone->leaves) {
+ if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
+ bm->cur.node = list_entry(bm->cur.node->list.next,
+ struct rtree_node, list);
bm->cur.node_pfn += BM_BITS_PER_BLOCK;
bm->cur.node_bit = 0;
touch_softlockup_watchdog();
@@ -775,9 +775,9 @@ static bool rtree_next_node(struct memory_bitmap *bm)
}
/* No more nodes, goto next zone */
- bm->cur.zone = list_entry(bm->cur.zone->list.next,
+ if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
+ bm->cur.zone = list_entry(bm->cur.zone->list.next,
struct mem_zone_bm_rtree, list);
- if (&bm->cur.zone->list != &bm->zones) {
bm->cur.node = list_entry(bm->cur.zone->leaves.next,
struct rtree_node, list);
bm->cur.node_pfn = 0;
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index 084452e34a12..bdff5ed57f10 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -203,8 +203,10 @@ static int __init test_suspend(void)
/* RTCs have initialized by now too ... can we use one? */
dev = class_find_device(rtc_class, NULL, NULL, has_wakealarm);
- if (dev)
+ if (dev) {
rtc = rtc_class_open(dev_name(dev));
+ put_device(dev);
+ }
if (!rtc) {
printk(warn_no_rtc);
return 0;
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 12cd989dadf6..160e1006640d 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -37,6 +37,14 @@
#define HIBERNATE_SIG "S1SUSPEND"
/*
+ * When reading an {un,}compressed image, we may restore pages in place,
+ * in which case some architectures need these pages cleaning before they
+ * can be executed. We don't know which pages these may be, so clean the lot.
+ */
+static bool clean_pages_on_read;
+static bool clean_pages_on_decompress;
+
+/*
* The swap map is a data structure used for keeping track of each page
* written to a swap partition. It consists of many swap_map_page
* structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
@@ -241,6 +249,9 @@ static void hib_end_io(struct bio *bio)
if (bio_data_dir(bio) == WRITE)
put_page(page);
+ else if (clean_pages_on_read)
+ flush_icache_range((unsigned long)page_address(page),
+ (unsigned long)page_address(page) + PAGE_SIZE);
if (bio->bi_error && !hb->error)
hb->error = bio->bi_error;
@@ -1049,6 +1060,7 @@ static int load_image(struct swap_map_handle *handle,
hib_init_batch(&hb);
+ clean_pages_on_read = true;
printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
nr_to_read);
m = nr_to_read / 10;
@@ -1124,6 +1136,10 @@ static int lzo_decompress_threadfn(void *data)
d->unc_len = LZO_UNC_SIZE;
d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
d->unc, &d->unc_len);
+ if (clean_pages_on_decompress)
+ flush_icache_range((unsigned long)d->unc,
+ (unsigned long)d->unc + d->unc_len);
+
atomic_set(&d->stop, 1);
wake_up(&d->done);
}
@@ -1189,6 +1205,8 @@ static int load_image_lzo(struct swap_map_handle *handle,
}
memset(crc, 0, offsetof(struct crc_data, go));
+ clean_pages_on_decompress = true;
+
/*
* Start the decompression threads.
*/
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
index 276762f3a460..d5760c42f042 100644
--- a/kernel/printk/braille.c
+++ b/kernel/printk/braille.c
@@ -9,10 +9,10 @@
char *_braille_console_setup(char **str, char **brl_options)
{
- if (!memcmp(*str, "brl,", 4)) {
+ if (!strncmp(*str, "brl,", 4)) {
*brl_options = "";
*str += 4;
- } else if (!memcmp(str, "brl=", 4)) {
+ } else if (!strncmp(*str, "brl=", 4)) {
*brl_options = *str + 4;
*str = strchr(*brl_options, ',');
if (!*str)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 3189e51db7e8..a46c40bfb5f6 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -39,6 +39,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
BUG_ON(!list_empty(&child->ptrace_entry));
list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
+ rcu_read_lock();
+ child->ptracer_cred = get_cred(__task_cred(new_parent));
+ rcu_read_unlock();
}
/**
@@ -71,11 +74,15 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
*/
void __ptrace_unlink(struct task_struct *child)
{
+ const struct cred *old_cred;
BUG_ON(!child->ptrace);
child->ptrace = 0;
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
+ old_cred = child->ptracer_cred;
+ child->ptracer_cred = NULL;
+ put_cred(old_cred);
spin_lock(&child->sighand->siglock);
@@ -219,7 +226,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
const struct cred *cred = current_cred(), *tcred;
- int dumpable = 0;
+ struct mm_struct *mm;
kuid_t caller_uid;
kgid_t caller_gid;
@@ -270,16 +277,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
return -EPERM;
ok:
rcu_read_unlock();
- smp_rmb();
- if (task->mm)
- dumpable = get_dumpable(task->mm);
- rcu_read_lock();
- if (dumpable != SUID_DUMP_USER &&
- !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
- rcu_read_unlock();
- return -EPERM;
- }
- rcu_read_unlock();
+ mm = task->mm;
+ if (mm &&
+ ((get_dumpable(mm) != SUID_DUMP_USER) &&
+ !ptrace_has_cap(mm->user_ns, mode)))
+ return -EPERM;
return security_ptrace_access_check(task, mode);
}
@@ -343,10 +345,6 @@ static int ptrace_attach(struct task_struct *task, long request,
if (seize)
flags |= PT_SEIZED;
- rcu_read_lock();
- if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
- flags |= PT_PTRACE_CAP;
- rcu_read_unlock();
task->ptrace = flags;
__ptrace_link(task, current);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f07343b54fe5..2cb46d51d715 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -57,6 +57,8 @@
#include <linux/trace_events.h>
#include <linux/suspend.h>
+#include <soc/qcom/watchdog.h>
+
#include "tree.h"
#include "rcu.h"
@@ -1298,6 +1300,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
rcu_check_gp_kthread_starvation(rsp);
+#ifdef CONFIG_RCU_STALL_WATCHDOG_BITE
+ /* Induce watchdog bite */
+ msm_trigger_wdog_bite();
+#endif
+
force_quiescent_state(rsp); /* Kick them all. */
}
@@ -1333,6 +1340,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+#ifdef CONFIG_RCU_STALL_WATCHDOG_BITE
+ /* Induce non secure watchdog bite to collect context */
+ msm_trigger_wdog_bite();
+#endif
+
/*
* Attempt to revive the RCU machinery by forcing a context switch.
*
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 630c19772630..32cbe72bf545 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2275,6 +2275,7 @@ static int rcu_nocb_kthread(void *arg)
cl++;
c++;
local_bh_enable();
+ cond_resched_rcu_qs();
list = next;
}
trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3fcadbae663d..312ffdad034a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2044,6 +2044,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
success = 1; /* we're going to change ->state */
+ /*
+ * Ensure we load p->on_rq _after_ p->state, otherwise it would
+ * be possible to, falsely, observe p->on_rq == 0 and get stuck
+ * in smp_cond_load_acquire() below.
+ *
+ * sched_ttwu_pending() try_to_wake_up()
+ * [S] p->on_rq = 1; [L] P->state
+ * UNLOCK rq->lock -----.
+ * \
+ * +--- RMB
+ * schedule() /
+ * LOCK rq->lock -----'
+ * UNLOCK rq->lock
+ *
+ * [task p]
+ * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq
+ *
+ * Pairs with the UNLOCK+LOCK on rq->lock from the
+ * last wakeup of our task and the schedule that got our task
+ * current.
+ */
+ smp_rmb();
if (p->on_rq && ttwu_remote(p, wake_flags))
goto stat;
@@ -2321,6 +2343,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
__dl_clear_params(p);
INIT_LIST_HEAD(&p->rt.run_list);
+ p->rt.timeout = 0;
+ p->rt.time_slice = sched_rr_timeslice;
+ p->rt.on_rq = 0;
+ p->rt.on_list = 0;
#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -3735,7 +3761,7 @@ EXPORT_SYMBOL(default_wake_function);
*/
void rt_mutex_setprio(struct task_struct *p, int prio)
{
- int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE;
+ int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
struct rq *rq;
const struct sched_class *prev_class;
@@ -3763,11 +3789,15 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
trace_sched_pi_setprio(p, prio);
oldprio = p->prio;
+
+ if (oldprio == prio)
+ queue_flag &= ~DEQUEUE_MOVE;
+
prev_class = p->sched_class;
queued = task_on_rq_queued(p);
running = task_current(rq, p);
if (queued)
- dequeue_task(rq, p, DEQUEUE_SAVE);
+ dequeue_task(rq, p, queue_flag);
if (running)
put_prev_task(rq, p);
@@ -3785,7 +3815,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (!dl_prio(p->normal_prio) ||
(pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
p->dl.dl_boosted = 1;
- enqueue_flag |= ENQUEUE_REPLENISH;
+ queue_flag |= ENQUEUE_REPLENISH;
} else
p->dl.dl_boosted = 0;
p->sched_class = &dl_sched_class;
@@ -3793,7 +3823,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (dl_prio(oldprio))
p->dl.dl_boosted = 0;
if (oldprio < prio)
- enqueue_flag |= ENQUEUE_HEAD;
+ queue_flag |= ENQUEUE_HEAD;
p->sched_class = &rt_sched_class;
} else {
if (dl_prio(oldprio))
@@ -3808,7 +3838,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (running)
p->sched_class->set_curr_task(rq);
if (queued)
- enqueue_task(rq, p, enqueue_flag);
+ enqueue_task(rq, p, queue_flag);
check_class_changed(rq, p, prev_class, oldprio);
out_unlock:
@@ -4164,6 +4194,7 @@ static int __sched_setscheduler(struct task_struct *p,
const struct sched_class *prev_class;
struct rq *rq;
int reset_on_fork;
+ int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
/* may grab non-irq protected spin_locks */
BUG_ON(in_interrupt());
@@ -4346,17 +4377,14 @@ change:
* itself.
*/
new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
- if (new_effective_prio == oldprio) {
- __setscheduler_params(p, attr);
- task_rq_unlock(rq, p, &flags);
- return 0;
- }
+ if (new_effective_prio == oldprio)
+ queue_flags &= ~DEQUEUE_MOVE;
}
queued = task_on_rq_queued(p);
running = task_current(rq, p);
if (queued)
- dequeue_task(rq, p, DEQUEUE_SAVE);
+ dequeue_task(rq, p, queue_flags);
if (running)
put_prev_task(rq, p);
@@ -4366,15 +4394,14 @@ change:
if (running)
p->sched_class->set_curr_task(rq);
if (queued) {
- int enqueue_flags = ENQUEUE_RESTORE;
/*
* We enqueue to tail when the priority of a task is
* increased (user space view).
*/
- if (oldprio <= p->prio)
- enqueue_flags |= ENQUEUE_HEAD;
+ if (oldprio < p->prio)
+ queue_flags |= ENQUEUE_HEAD;
- enqueue_task(rq, p, enqueue_flags);
+ enqueue_task(rq, p, queue_flags);
}
check_class_changed(rq, p, prev_class, oldprio);
@@ -8707,7 +8734,7 @@ void sched_move_task(struct task_struct *tsk)
queued = task_on_rq_queued(tsk);
if (queued)
- dequeue_task(rq, tsk, DEQUEUE_SAVE);
+ dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
if (unlikely(running))
put_prev_task(rq, tsk);
@@ -8731,7 +8758,7 @@ void sched_move_task(struct task_struct *tsk)
if (unlikely(running))
tsk->sched_class->set_curr_task(rq);
if (queued)
- enqueue_task(rq, tsk, ENQUEUE_RESTORE);
+ enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
task_rq_unlock(rq, tsk, &flags);
}
@@ -9523,7 +9550,6 @@ struct cgroup_subsys cpu_cgrp_subsys = {
.fork = cpu_cgroup_fork,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,
- .allow_attach = subsys_cgroup_allow_attach,
.legacy_cftypes = cpu_files,
.early_init = 1,
};
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
index 1e3accddd103..983159cc0646 100644
--- a/kernel/sched/core_ctl.c
+++ b/kernel/sched/core_ctl.c
@@ -10,6 +10,8 @@
* GNU General Public License for more details.
*/
+#define pr_fmt(fmt) "core_ctl: " fmt
+
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
@@ -50,7 +52,6 @@ struct cluster_data {
};
struct cpu_data {
- bool online;
bool is_busy;
unsigned int busy;
unsigned int cpu;
@@ -242,22 +243,6 @@ static ssize_t show_is_big_cluster(const struct cluster_data *state, char *buf)
return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster);
}
-static ssize_t show_cpus(const struct cluster_data *state, char *buf)
-{
- struct cpu_data *c;
- ssize_t count = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&state_lock, flags);
- list_for_each_entry(c, &state->lru, sib) {
- count += snprintf(buf + count, PAGE_SIZE - count,
- "CPU%u (%s)\n", c->cpu,
- c->online ? "Online" : "Offline");
- }
- spin_unlock_irqrestore(&state_lock, flags);
- return count;
-}
-
static ssize_t show_need_cpus(const struct cluster_data *state, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u\n", state->need_cpus);
@@ -286,10 +271,11 @@ static ssize_t show_global_state(const struct cluster_data *state, char *buf)
count += snprintf(buf + count, PAGE_SIZE - count,
"\tCPU: %u\n", c->cpu);
count += snprintf(buf + count, PAGE_SIZE - count,
- "\tOnline: %u\n", c->online);
+ "\tOnline: %u\n",
+ cpu_online(c->cpu));
count += snprintf(buf + count, PAGE_SIZE - count,
- "\tActive: %u\n",
- !cpu_isolated(c->cpu));
+ "\tIsolated: %u\n",
+ cpu_isolated(c->cpu));
count += snprintf(buf + count, PAGE_SIZE - count,
"\tFirst CPU: %u\n",
cluster->first_cpu);
@@ -376,7 +362,6 @@ core_ctl_attr_rw(busy_up_thres);
core_ctl_attr_rw(busy_down_thres);
core_ctl_attr_rw(task_thres);
core_ctl_attr_rw(is_big_cluster);
-core_ctl_attr_ro(cpus);
core_ctl_attr_ro(need_cpus);
core_ctl_attr_ro(active_cpus);
core_ctl_attr_ro(global_state);
@@ -390,7 +375,6 @@ static struct attribute *default_attrs[] = {
&busy_down_thres.attr,
&task_thres.attr,
&is_big_cluster.attr,
- &cpus.attr,
&need_cpus.attr,
&active_cpus.attr,
&global_state.attr,
@@ -534,7 +518,7 @@ static unsigned int get_active_cpu_count(const struct cluster_data *cluster)
static bool is_active(const struct cpu_data *state)
{
- return state->online && !cpu_isolated(state->cpu);
+ return cpu_online(state->cpu) && !cpu_isolated(state->cpu);
}
static bool adjustment_possible(const struct cluster_data *cluster,
@@ -815,7 +799,7 @@ static void __try_to_unisolate(struct cluster_data *cluster,
if (!c->isolated_by_us)
continue;
- if ((c->online && !cpu_isolated(c->cpu)) ||
+ if ((cpu_online(c->cpu) && !cpu_isolated(c->cpu)) ||
(!force && c->not_preferred))
continue;
if (cluster->active_cpus == need)
@@ -904,19 +888,7 @@ static int __ref cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
-
- /* If online state of CPU somehow got out of sync, fix it. */
- if (state->online) {
- state->online = false;
- cluster->active_cpus = get_active_cpu_count(cluster);
- pr_warn("CPU%d offline when state is online\n", cpu);
- }
- break;
-
case CPU_ONLINE:
-
- state->online = true;
cluster->active_cpus = get_active_cpu_count(cluster);
/*
@@ -941,15 +913,6 @@ static int __ref cpu_callback(struct notifier_block *nfb,
/* Move a CPU to the end of the LRU when it goes offline. */
move_cpu_lru(state);
- /* Fall through */
-
- case CPU_UP_CANCELED:
-
- /* If online state of CPU somehow got out of sync, fix it. */
- if (!state->online)
- pr_warn("CPU%d online when state is offline\n", cpu);
-
- state->online = false;
state->busy = 0;
cluster->active_cpus = get_active_cpu_count(cluster);
break;
@@ -1028,8 +991,6 @@ static int cluster_init(const struct cpumask *mask)
state = &per_cpu(cpu_state, cpu);
state->cluster = cluster;
state->cpu = cpu;
- if (cpu_online(cpu))
- state->online = true;
list_add_tail(&state->sib, &cluster->lru);
}
cluster->active_cpus = get_active_cpu_count(cluster);
diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
index f6f9b9b3a4a8..d751bc2d0d6e 100644
--- a/kernel/sched/cpufreq_sched.c
+++ b/kernel/sched/cpufreq_sched.c
@@ -289,7 +289,7 @@ static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
pr_debug("%s: throttle threshold = %u [ns]\n",
__func__, gd->up_throttle_nsec);
- rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr());
+ rc = sysfs_create_group(&policy->kobj, get_sysfs_attr());
if (rc) {
pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc);
goto err;
@@ -332,7 +332,7 @@ static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
put_task_struct(gd->task);
}
- sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr());
+ sysfs_remove_group(&policy->kobj, get_sysfs_attr());
policy->governor_data = NULL;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 00bbd91d6767..8d5353906c8d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3844,6 +3844,10 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
cfs_rq->load_last_update_time_copy = sa->last_update_time;
#endif
+ /* Trace CPU load, unless cfs_rq belongs to a non-root task_group */
+ if (cfs_rq == &rq_of(cfs_rq)->cfs)
+ trace_sched_load_avg_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq);
+
return decayed || removed;
}
@@ -3867,7 +3871,6 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
if (entity_is_task(se))
trace_sched_load_avg_task(task_of(se), &se->avg);
- trace_sched_load_avg_cpu(cpu, cfs_rq);
}
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -6845,17 +6848,19 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre
if (new_util < cur_capacity) {
if (cpu_rq(i)->nr_running) {
- if(prefer_idle) {
- // Find a target cpu with lowest
- // utilization.
+ if (prefer_idle) {
+ /* Find a target cpu with highest
+ * utilization.
+ */
if (target_util == 0 ||
target_util < new_util) {
target_cpu = i;
target_util = new_util;
}
} else {
- // Find a target cpu with highest
- // utilization.
+ /* Find a target cpu with lowest
+ * utilization.
+ */
if (target_util == 0 ||
target_util > new_util) {
target_cpu = i;
@@ -8382,7 +8387,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
mcc->cpu = cpu;
#ifdef CONFIG_SCHED_DEBUG
raw_spin_unlock_irqrestore(&mcc->lock, flags);
- pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity);
+ printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n",
+ cpu, capacity);
goto skip_unlock;
#endif
}
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 7cc74e56fde4..c30c48fde7e6 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -73,4 +73,8 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true)
* Energy aware scheduling. Use platform energy model to guide scheduling
* decisions optimizing for energy efficiency.
*/
+#ifdef CONFIG_DEFAULT_USE_ENERGY_AWARE
+SCHED_FEAT(ENERGY_AWARE, true)
+#else
SCHED_FEAT(ENERGY_AWARE, false)
+#endif
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 744c60dfb4fb..df47c26ab6d2 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -3274,7 +3274,9 @@ exit_early:
trace_sched_get_busy(cpu, busy[i].prev_load,
busy[i].new_task_load,
busy[i].predicted_load,
- early_detection[i]);
+ early_detection[i],
+ aggregate_load &&
+ cpu == max_busy_cpu);
i++;
}
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b72352bbd752..07b2c63e4983 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -441,7 +441,7 @@ static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
static inline int on_rt_rq(struct sched_rt_entity *rt_se)
{
- return !list_empty(&rt_se->run_list);
+ return rt_se->on_rq;
}
#ifdef CONFIG_RT_GROUP_SCHED
@@ -487,8 +487,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
return rt_se->my_q;
}
-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
-static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
+static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
@@ -504,7 +504,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
if (!rt_se)
enqueue_top_rt_rq(rt_rq);
else if (!on_rt_rq(rt_se))
- enqueue_rt_entity(rt_se, false);
+ enqueue_rt_entity(rt_se, 0);
if (rt_rq->highest_prio.curr < curr->prio)
resched_curr(rq);
@@ -521,7 +521,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
if (!rt_se)
dequeue_top_rt_rq(rt_rq);
else if (on_rt_rq(rt_se))
- dequeue_rt_entity(rt_se);
+ dequeue_rt_entity(rt_se, 0);
}
static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1257,7 +1257,30 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
dec_rt_group(rt_se, rt_rq);
}
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+/*
+ * Change rt_se->run_list location unless SAVE && !MOVE
+ *
+ * assumes ENQUEUE/DEQUEUE flags match
+ */
+static inline bool move_entity(unsigned int flags)
+{
+ if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
+ return false;
+
+ return true;
+}
+
+static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
+{
+ list_del_init(&rt_se->run_list);
+
+ if (list_empty(array->queue + rt_se_prio(rt_se)))
+ __clear_bit(rt_se_prio(rt_se), array->bitmap);
+
+ rt_se->on_list = 0;
+}
+
+static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
@@ -1270,26 +1293,37 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
* get throttled and the current group doesn't have any other
* active members.
*/
- if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+ if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
+ if (rt_se->on_list)
+ __delist_rt_entity(rt_se, array);
return;
+ }
- if (head)
- list_add(&rt_se->run_list, queue);
- else
- list_add_tail(&rt_se->run_list, queue);
- __set_bit(rt_se_prio(rt_se), array->bitmap);
+ if (move_entity(flags)) {
+ WARN_ON_ONCE(rt_se->on_list);
+ if (flags & ENQUEUE_HEAD)
+ list_add(&rt_se->run_list, queue);
+ else
+ list_add_tail(&rt_se->run_list, queue);
+
+ __set_bit(rt_se_prio(rt_se), array->bitmap);
+ rt_se->on_list = 1;
+ }
+ rt_se->on_rq = 1;
inc_rt_tasks(rt_se, rt_rq);
}
-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
+static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
- list_del_init(&rt_se->run_list);
- if (list_empty(array->queue + rt_se_prio(rt_se)))
- __clear_bit(rt_se_prio(rt_se), array->bitmap);
+ if (move_entity(flags)) {
+ WARN_ON_ONCE(!rt_se->on_list);
+ __delist_rt_entity(rt_se, array);
+ }
+ rt_se->on_rq = 0;
dec_rt_tasks(rt_se, rt_rq);
}
@@ -1298,7 +1332,7 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
* Because the prio of an upper entry depends on the lower
* entries, we must remove entries top - down.
*/
-static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
+static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct sched_rt_entity *back = NULL;
@@ -1311,31 +1345,31 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
for (rt_se = back; rt_se; rt_se = rt_se->back) {
if (on_rt_rq(rt_se))
- __dequeue_rt_entity(rt_se);
+ __dequeue_rt_entity(rt_se, flags);
}
}
-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
- dequeue_rt_stack(rt_se);
+ dequeue_rt_stack(rt_se, flags);
for_each_sched_rt_entity(rt_se)
- __enqueue_rt_entity(rt_se, head);
+ __enqueue_rt_entity(rt_se, flags);
enqueue_top_rt_rq(&rq->rt);
}
-static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
- dequeue_rt_stack(rt_se);
+ dequeue_rt_stack(rt_se, flags);
for_each_sched_rt_entity(rt_se) {
struct rt_rq *rt_rq = group_rt_rq(rt_se);
if (rt_rq && rt_rq->rt_nr_running)
- __enqueue_rt_entity(rt_se, false);
+ __enqueue_rt_entity(rt_se, flags);
}
enqueue_top_rt_rq(&rq->rt);
}
@@ -1351,7 +1385,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
if (flags & ENQUEUE_WAKEUP)
rt_se->timeout = 0;
- enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
+ enqueue_rt_entity(rt_se, flags);
inc_hmp_sched_stats_rt(rq, p);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
@@ -1363,7 +1397,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
struct sched_rt_entity *rt_se = &p->rt;
update_curr_rt(rq);
- dequeue_rt_entity(rt_se);
+ dequeue_rt_entity(rt_se, flags);
dec_hmp_sched_stats_rt(rq, p);
dequeue_pushable_task(rq, p);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 360e298398fb..75500042fd32 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1941,19 +1941,41 @@ static const u32 prio_to_wmult[40] = {
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
+/*
+ * {de,en}queue flags:
+ *
+ * DEQUEUE_SLEEP - task is no longer runnable
+ * ENQUEUE_WAKEUP - task just became runnable
+ *
+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
+ * are in a known state which allows modification. Such pairs
+ * should preserve as much state as possible.
+ *
+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
+ * in the runqueue.
+ *
+ * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
+ * ENQUEUE_WAKING - sched_class::task_waking was called
+ *
+ */
+
+#define DEQUEUE_SLEEP 0x01
+#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
+#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
+
#define ENQUEUE_WAKEUP 0x01
-#define ENQUEUE_HEAD 0x02
+#define ENQUEUE_RESTORE 0x02
+#define ENQUEUE_MOVE 0x04
+
+#define ENQUEUE_HEAD 0x08
+#define ENQUEUE_REPLENISH 0x10
#ifdef CONFIG_SMP
-#define ENQUEUE_WAKING 0x04 /* sched_class::task_waking was called */
+#define ENQUEUE_WAKING 0x20
#else
#define ENQUEUE_WAKING 0x00
#endif
-#define ENQUEUE_REPLENISH 0x08
-#define ENQUEUE_RESTORE 0x10
-#define ENQUEUE_WAKEUP_NEW 0x20
-
-#define DEQUEUE_SLEEP 0x01
-#define DEQUEUE_SAVE 0x02
+#define ENQUEUE_WAKEUP_NEW 0x40
#define RETRY_TASK ((void *)-1UL)
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index b2ff383d6062..b0c5fe6d1f3b 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -896,7 +896,6 @@ struct cgroup_subsys schedtune_cgrp_subsys = {
.cancel_attach = schedtune_cancel_attach,
.legacy_cftypes = files,
.early_init = 1,
- .allow_attach = subsys_cgroup_allow_attach,
.attach = schedtune_attach,
};
@@ -910,6 +909,7 @@ schedtune_init_cgroups(void)
for_each_possible_cpu(cpu) {
bg = &per_cpu(cpu_boost_groups, cpu);
memset(bg, 0, sizeof(struct boost_groups));
+ raw_spin_lock_init(&bg->lock);
}
pr_info("schedtune: configured to support %d boost groups\n",
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 07b7f84b37e2..6e053bd9830c 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -22,7 +22,6 @@
#include <linux/syscore_ops.h>
#include <linux/cpufreq.h>
#include <trace/events/sched.h>
-#include <clocksource/arm_arch_timer.h>
#include "sched.h"
#include "walt.h"
@@ -63,8 +62,6 @@ static unsigned int max_possible_freq = 1;
*/
static unsigned int min_max_freq = 1;
-static unsigned int max_capacity = 1024;
-static unsigned int min_capacity = 1024;
static unsigned int max_load_scale_factor = 1024;
static unsigned int max_possible_capacity = 1024;
@@ -188,10 +185,8 @@ update_window_start(struct rq *rq, u64 wallclock)
delta = wallclock - rq->window_start;
/* If the MPM global timer is cleared, set delta as 0 to avoid kernel BUG happening */
if (delta < 0) {
- if (arch_timer_read_counter() == 0)
- delta = 0;
- else
- BUG_ON(1);
+ delta = 0;
+ WARN_ONCE(1, "WALT wallclock appears to have gone backwards or reset\n");
}
if (delta < walt_ravg_window)
@@ -872,39 +867,6 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu)
double_rq_unlock(src_rq, dest_rq);
}
-/* Keep track of max/min capacity possible across CPUs "currently" */
-static void __update_min_max_capacity(void)
-{
- int i;
- int max = 0, min = INT_MAX;
-
- for_each_online_cpu(i) {
- if (cpu_rq(i)->capacity > max)
- max = cpu_rq(i)->capacity;
- if (cpu_rq(i)->capacity < min)
- min = cpu_rq(i)->capacity;
- }
-
- max_capacity = max;
- min_capacity = min;
-}
-
-static void update_min_max_capacity(void)
-{
- unsigned long flags;
- int i;
-
- local_irq_save(flags);
- for_each_possible_cpu(i)
- raw_spin_lock(&cpu_rq(i)->lock);
-
- __update_min_max_capacity();
-
- for_each_possible_cpu(i)
- raw_spin_unlock(&cpu_rq(i)->lock);
- local_irq_restore(flags);
-}
-
/*
* Return 'capacity' of a cpu in reference to "least" efficient cpu, such that
* least efficient cpu gets capacity of 1024
@@ -987,15 +949,9 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
/* Initialized to policy->max in case policy->related_cpus is empty! */
unsigned int orig_max_freq = policy->max;
- if (val != CPUFREQ_NOTIFY && val != CPUFREQ_REMOVE_POLICY &&
- val != CPUFREQ_CREATE_POLICY)
+ if (val != CPUFREQ_NOTIFY)
return 0;
- if (val == CPUFREQ_REMOVE_POLICY || val == CPUFREQ_CREATE_POLICY) {
- update_min_max_capacity();
- return 0;
- }
-
for_each_cpu(i, policy->related_cpus) {
cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
policy->related_cpus);
@@ -1085,8 +1041,6 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
max_load_scale_factor = highest_mplsf;
}
- __update_min_max_capacity();
-
return 0;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ffa85996313c..8cc5167e4b04 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2371,6 +2371,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
return 0;
}
+static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
+ int write, void *data)
+{
+ if (write) {
+ if (*negp)
+ return -EINVAL;
+ *valp = *lvalp;
+ } else {
+ unsigned int val = *valp;
+ *lvalp = (unsigned long)val;
+ }
+ return 0;
+}
+
static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
@@ -2498,8 +2513,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
int proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table,write,buffer,lenp,ppos,
- NULL,NULL);
+ return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
+}
+
+/**
+ * proc_douintvec - read a vector of unsigned integers
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
+ do_proc_douintvec_conv, NULL);
}
/*
@@ -2700,6 +2734,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
break;
if (neg)
continue;
+ val = convmul * val / convdiv;
if ((min && val < *min) || (max && val > *max))
continue;
*i = val;
@@ -3112,6 +3147,12 @@ int proc_dointvec(struct ctl_table *table, int write,
return -ENOSYS;
}
+int proc_douintvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
int proc_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -3157,6 +3198,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
* exception granted :-)
*/
EXPORT_SYMBOL(proc_dointvec);
+EXPORT_SYMBOL(proc_douintvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
EXPORT_SYMBOL(proc_dointvec_minmax);
EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 0cdc34ebd8d1..2af5687b83c9 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -412,12 +412,10 @@ static int alarmtimer_suspend(struct device *dev)
now = rtc_tm_to_ktime(tm);
now = ktime_add(now, min);
if (poweron_alarm) {
- struct rtc_time tm_val;
- unsigned long secs;
+ uint64_t msec = 0;
- tm_val = rtc_ktime_to_tm(min);
- rtc_tm_to_time(&tm_val, &secs);
- lpm_suspend_wake_time(secs);
+ msec = ktime_to_ms(min);
+ lpm_suspend_wake_time(msec);
} else {
/* Set alarm, if in the past reject suspend briefly to handle */
ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b98810d2f3b4..89cc82a38e4d 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -108,7 +108,7 @@ static int finished_booting;
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
-static void clocksource_select(void);
+static void clocksource_select(bool force);
static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog;
@@ -415,7 +415,7 @@ static int clocksource_watchdog_kthread(void *data)
{
mutex_lock(&clocksource_mutex);
if (__clocksource_watchdog_kthread())
- clocksource_select();
+ clocksource_select(false);
mutex_unlock(&clocksource_mutex);
return 0;
}
@@ -555,11 +555,12 @@ static inline void clocksource_update_max_deferment(struct clocksource *cs)
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
-static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
+static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur,
+ bool force)
{
struct clocksource *cs;
- if (!finished_booting || list_empty(&clocksource_list))
+ if ((!finished_booting && !force) || list_empty(&clocksource_list))
return NULL;
/*
@@ -577,13 +578,13 @@ static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
return NULL;
}
-static void __clocksource_select(bool skipcur)
+static void __clocksource_select(bool skipcur, bool force)
{
bool oneshot = tick_oneshot_mode_active();
struct clocksource *best, *cs;
/* Find the best suitable clocksource */
- best = clocksource_find_best(oneshot, skipcur);
+ best = clocksource_find_best(oneshot, skipcur, force);
if (!best)
return;
@@ -623,22 +624,40 @@ static void __clocksource_select(bool skipcur)
* Select the clocksource with the best rating, or the clocksource,
* which is selected by userspace override.
*/
-static void clocksource_select(void)
+static void clocksource_select(bool force)
{
- __clocksource_select(false);
+ return __clocksource_select(false, force);
}
static void clocksource_select_fallback(void)
{
- __clocksource_select(true);
+ __clocksource_select(true, false);
}
#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
-static inline void clocksource_select(void) { }
+
+static inline void clocksource_select(bool force) { }
static inline void clocksource_select_fallback(void) { }
#endif
+/**
+ * clocksource_select_force - Force re-selection of the best clocksource
+ * among registered clocksources
+ *
+ * clocksource_select() can't select the best clocksource before
+ * calling clocksource_done_booting() and since clocksource_select()
+ * should be called with clocksource_mutex held, provide a new API
+ * can be called from other files to select best clockrouce irrespective
+ * of finished_booting flag.
+ */
+void clocksource_select_force(void)
+{
+ mutex_lock(&clocksource_mutex);
+ clocksource_select(true);
+ mutex_unlock(&clocksource_mutex);
+}
+
/*
* clocksource_done_booting - Called near the end of core bootup
*
@@ -655,7 +674,7 @@ static int __init clocksource_done_booting(void)
* Run the watchdog first to eliminate unstable clock sources
*/
__clocksource_watchdog_kthread();
- clocksource_select();
+ clocksource_select(false);
mutex_unlock(&clocksource_mutex);
return 0;
}
@@ -744,6 +763,7 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq
}
EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
+
/**
* __clocksource_register_scale - Used to install new clocksources
* @cs: clocksource to be registered
@@ -765,7 +785,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
- clocksource_select();
+ clocksource_select(false);
clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
return 0;
@@ -788,7 +808,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
{
mutex_lock(&clocksource_mutex);
__clocksource_change_rating(cs, rating);
- clocksource_select();
+ clocksource_select(false);
clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
}
@@ -892,7 +912,7 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
ret = sysfs_get_uname(buf, override_name, count);
if (ret >= 0)
- clocksource_select();
+ clocksource_select(false);
mutex_unlock(&clocksource_mutex);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f6aae7977824..d2a20e83ebae 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -871,6 +871,9 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
int cpu = smp_processor_id();
+ if (!bc)
+ return;
+
/* Set it up only once ! */
if (bc->event_handler != tick_handle_oneshot_broadcast) {
int was_periodic = clockevent_state_periodic(bc);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4ff237dbc006..5fa544f3f560 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -298,17 +298,34 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
static inline u32 arch_gettimeoffset(void) { return 0; }
#endif
+static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+ cycle_t delta)
+{
+ u64 nsec;
+
+ nsec = delta * tkr->mult + tkr->xtime_nsec;
+ nsec >>= tkr->shift;
+
+ /* If arch requires, add in get_arch_timeoffset() */
+ return nsec + arch_gettimeoffset();
+}
+
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
{
cycle_t delta;
- s64 nsec;
delta = timekeeping_get_delta(tkr);
+ return timekeeping_delta_to_ns(tkr, delta);
+}
- nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift;
+static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
+ cycle_t cycles)
+{
+ cycle_t delta;
- /* If arch requires, add in get_arch_timeoffset() */
- return nsec + arch_gettimeoffset();
+ /* calculate the delta since the last update_wall_time */
+ delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+ return timekeeping_delta_to_ns(tkr, delta);
}
/**
@@ -385,8 +402,11 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
- now += clocksource_delta(tkr->read(tkr->clock),
- tkr->cycle_last, tkr->mask);
+ now += timekeeping_delta_to_ns(tkr,
+ clocksource_delta(
+ tkr->read(tkr->clock),
+ tkr->cycle_last,
+ tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
return now;
@@ -404,6 +424,35 @@ u64 ktime_get_raw_fast_ns(void)
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+/**
+ * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
+ *
+ * To keep it NMI safe since we're accessing from tracing, we're not using a
+ * separate timekeeper with updates to monotonic clock and boot offset
+ * protected with seqlocks. This has the following minor side effects:
+ *
+ * (1) Its possible that a timestamp be taken after the boot offset is updated
+ * but before the timekeeper is updated. If this happens, the new boot offset
+ * is added to the old timekeeping making the clock appear to update slightly
+ * earlier:
+ * CPU 0 CPU 1
+ * timekeeping_inject_sleeptime64()
+ * __timekeeping_inject_sleeptime(tk, delta);
+ * timestamp();
+ * timekeeping_update(tk, TK_CLEAR_NTP...);
+ *
+ * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
+ * partially updated. Since the tk->offs_boot update is a rare event, this
+ * should be a rare occurrence which postprocessing should be able to handle.
+ */
+u64 notrace ktime_get_boot_fast_ns(void)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+
+ return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+}
+EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
+
/* Suspend-time cycles value for halted fast timekeeper. */
static cycle_t cycles_at_suspend;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2acad4b6a92a..2963266fb7bf 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,4 +1,8 @@
+# We are fully aware of the dangers of __builtin_return_address()
+FRAME_CFLAGS := $(call cc-disable-warning,frame-address)
+KBUILD_CFLAGS += $(FRAME_CFLAGS)
+
# Do not instrument the tracer itself:
ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 069aa1aa82b6..66d9e907aa07 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -895,6 +895,7 @@ static struct {
{ trace_clock, "perf", 1 },
{ ktime_get_mono_fast_ns, "mono", 1 },
{ ktime_get_raw_fast_ns, "mono_raw", 1 },
+ { ktime_get_boot_fast_ns, "boot", 1 },
ARCH_TRACE_CLOCKS
};
@@ -3990,6 +3991,7 @@ static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
{
kfree(s->saved_cmdlines);
kfree(s->map_cmdline_to_pid);
+ kfree(s->saved_tgids);
kfree(s);
}
@@ -4832,19 +4834,20 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
struct trace_iterator *iter = filp->private_data;
ssize_t sret;
- /* return any leftover data */
- sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
- if (sret != -EBUSY)
- return sret;
-
- trace_seq_init(&iter->seq);
-
/*
* Avoid more than one consumer on a single file descriptor
* This is just a matter of traces coherency, the ring buffer itself
* is protected.
*/
mutex_lock(&iter->mutex);
+
+ /* return any leftover data */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (sret != -EBUSY)
+ goto out;
+
+ trace_seq_init(&iter->seq);
+
if (iter->trace->read) {
sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
if (sret)
@@ -5874,9 +5877,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
return -EBUSY;
#endif
- if (splice_grow_spd(pipe, &spd))
- return -ENOMEM;
-
if (*ppos & (PAGE_SIZE - 1))
return -EINVAL;
@@ -5886,6 +5886,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
len &= PAGE_MASK;
}
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
+
again:
trace_access_lock(iter->cpu_file);
entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
@@ -5943,19 +5946,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
/* did we read anything? */
if (!spd.nr_pages) {
if (ret)
- return ret;
+ goto out;
+ ret = -EAGAIN;
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
- return -EAGAIN;
+ goto out;
ret = wait_on_pipe(iter, true);
if (ret)
- return ret;
+ goto out;
goto again;
}
ret = splice_to_pipe(pipe, &spd);
+out:
splice_shrink_spd(&spd);
return ret;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4641bdb40f8f..96c75b0e9831 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -785,6 +785,10 @@ print_graph_entry_leaf(struct trace_iterator *iter,
cpu_data = per_cpu_ptr(data->cpu_data, cpu);
+ /* If a graph tracer ignored set_graph_notrace */
+ if (call->depth < -1)
+ call->depth += FTRACE_NOTRACE_DEPTH;
+
/*
* Comments display at + 1 to depth. Since
* this is a leaf function, keep the comments
@@ -793,7 +797,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
cpu_data->depth = call->depth - 1;
/* No need to keep this function around for this depth */
- if (call->depth < FTRACE_RETFUNC_DEPTH)
+ if (call->depth < FTRACE_RETFUNC_DEPTH &&
+ !WARN_ON_ONCE(call->depth < 0))
cpu_data->enter_funcs[call->depth] = 0;
}
@@ -823,11 +828,16 @@ print_graph_entry_nested(struct trace_iterator *iter,
struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
+ /* If a graph tracer ignored set_graph_notrace */
+ if (call->depth < -1)
+ call->depth += FTRACE_NOTRACE_DEPTH;
+
cpu_data = per_cpu_ptr(data->cpu_data, cpu);
cpu_data->depth = call->depth;
/* Save this function pointer to see if the exit matches */
- if (call->depth < FTRACE_RETFUNC_DEPTH)
+ if (call->depth < FTRACE_RETFUNC_DEPTH &&
+ !WARN_ON_ONCE(call->depth < 0))
cpu_data->enter_funcs[call->depth] = call->func;
}
@@ -1057,7 +1067,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
*/
cpu_data->depth = trace->depth - 1;
- if (trace->depth < FTRACE_RETFUNC_DEPTH) {
+ if (trace->depth < FTRACE_RETFUNC_DEPTH &&
+ !WARN_ON_ONCE(trace->depth < 0)) {
if (cpu_data->enter_funcs[trace->depth] != trace->func)
func_match = 0;
cpu_data->enter_funcs[trace->depth] = 0;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f2813e137b23..1de2ef8ec926 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,6 +27,7 @@
#include <linux/kvm_para.h>
#include <linux/perf_event.h>
#include <linux/kthread.h>
+#include <soc/qcom/watchdog.h>
/*
* The run state of the lockup detectors is controlled by the content of the
@@ -366,8 +367,11 @@ static void watchdog_check_hardlockup_other_cpu(void)
if (per_cpu(hard_watchdog_warn, next_cpu) == true)
return;
- if (hardlockup_panic)
- panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+ if (hardlockup_panic) {
+ pr_err("Watchdog detected hard LOCKUP on cpu %u",
+ next_cpu);
+ msm_trigger_wdog_bite();
+ }
else
WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
@@ -423,13 +427,15 @@ static void watchdog_overflow_callback(struct perf_event *event,
*/
if (is_hardlockup()) {
int this_cpu = smp_processor_id();
- struct pt_regs *regs = get_irq_regs();
/* only print hardlockups once */
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ if (hardlockup_panic)
+ msm_trigger_wdog_bite();
+
print_modules();
print_irqtrace_events(current);
if (regs)
@@ -552,6 +558,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
+
+ if (softlockup_panic)
+ msm_trigger_wdog_bite();
__this_cpu_write(softlockup_task_ptr_saved, current);
print_modules();
print_irqtrace_events(current);