summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorJohannes Berg <johannes.berg@intel.com>2015-06-10 12:44:58 +0200
committerJohannes Berg <johannes.berg@intel.com>2015-06-10 12:45:09 +0200
commit206c59d1d7d42bcafc1d7f1e476e87e4427e2345 (patch)
treea2f99470bd0fe43f5cf57812fca969bb3ca3c451 /kernel
parent5ec596c41bba6f4e3eeef5dc089afc8eaa702a7e (diff)
parentc3eee1fb1d308564ada5f7ea57bc51efc6130b37 (diff)
Merge remote-tracking branch 'net-next/master' into mac80211-next
Merge back net-next to get wireless driver changes (from Kalle) to be able to create the API change across all trees properly. Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/audit.c49
-rw-r--r--kernel/audit.h3
-rw-r--r--kernel/audit_tree.c92
-rw-r--r--kernel/audit_watch.c14
-rw-r--r--kernel/auditsc.c15
-rw-r--r--kernel/bpf/arraymap.c113
-rw-r--r--kernel/bpf/core.c110
-rw-r--r--kernel/bpf/helpers.c47
-rw-r--r--kernel/bpf/syscall.c42
-rw-r--r--kernel/bpf/verifier.c54
-rw-r--r--kernel/compat.c6
-rw-r--r--kernel/events/core.c44
-rw-r--r--kernel/events/ring_buffer.c14
-rw-r--r--kernel/irq/dummychip.c1
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/locking/lockdep.c16
-rw-r--r--kernel/locking/rtmutex.c12
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/params.c4
-rw-r--r--kernel/printk/printk.c53
-rw-r--r--kernel/rcu/tree.c16
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched/core.c74
-rw-r--r--kernel/sched/idle.c16
-rw-r--r--kernel/seccomp.c70
-rw-r--r--kernel/smp.c80
-rw-r--r--kernel/time/clockevents.c8
-rw-r--r--kernel/time/hrtimer.c14
-rw-r--r--kernel/trace/bpf_trace.c14
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace_events.c19
-rw-r--r--kernel/trace/trace_functions_graph.c8
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/watchdog.c20
36 files changed, 689 insertions, 370 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0f8f8b0bc1bf..60c302cfb4d3 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -197,9 +197,9 @@ x509.genkey:
@echo >>x509.genkey "x509_extensions = myexts"
@echo >>x509.genkey
@echo >>x509.genkey "[ req_distinguished_name ]"
- @echo >>x509.genkey "O = Magrathea"
- @echo >>x509.genkey "CN = Glacier signing key"
- @echo >>x509.genkey "emailAddress = slartibartfast@magrathea.h2g2"
+ @echo >>x509.genkey "#O = Unspecified company"
+ @echo >>x509.genkey "CN = Build time autogenerated kernel key"
+ @echo >>x509.genkey "#emailAddress = unspecified.user@unspecified.company"
@echo >>x509.genkey
@echo >>x509.genkey "[ myexts ]"
@echo >>x509.genkey "basicConstraints=critical,CA:FALSE"
diff --git a/kernel/audit.c b/kernel/audit.c
index 72ab759a0b43..1c13e4267de6 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -43,6 +43,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/file.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/atomic.h>
@@ -107,6 +108,7 @@ static u32 audit_rate_limit;
* When set to zero, this means unlimited. */
static u32 audit_backlog_limit = 64;
#define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
+static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
static u32 audit_backlog_wait_overflow = 0;
@@ -338,13 +340,13 @@ static int audit_set_backlog_limit(u32 limit)
static int audit_set_backlog_wait_time(u32 timeout)
{
return audit_do_config_change("audit_backlog_wait_time",
- &audit_backlog_wait_time, timeout);
+ &audit_backlog_wait_time_master, timeout);
}
static int audit_set_enabled(u32 state)
{
int rc;
- if (state < AUDIT_OFF || state > AUDIT_LOCKED)
+ if (state > AUDIT_LOCKED)
return -EINVAL;
rc = audit_do_config_change("audit_enabled", &audit_enabled, state);
@@ -663,7 +665,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
case AUDIT_MAKE_EQUIV:
/* Only support auditd and auditctl in initial pid namespace
* for now. */
- if ((task_active_pid_ns(current) != &init_pid_ns))
+ if (task_active_pid_ns(current) != &init_pid_ns)
return -EPERM;
if (!netlink_capable(skb, CAP_AUDIT_CONTROL))
@@ -834,7 +836,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
s.lost = atomic_read(&audit_lost);
s.backlog = skb_queue_len(&audit_skb_queue);
s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
- s.backlog_wait_time = audit_backlog_wait_time;
+ s.backlog_wait_time = audit_backlog_wait_time_master;
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
break;
}
@@ -877,8 +879,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (s.mask & AUDIT_STATUS_BACKLOG_WAIT_TIME) {
if (sizeof(s) > (size_t)nlh->nlmsg_len)
return -EINVAL;
- if (s.backlog_wait_time < 0 ||
- s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME)
+ if (s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME)
return -EINVAL;
err = audit_set_backlog_wait_time(s.backlog_wait_time);
if (err < 0)
@@ -1385,7 +1386,8 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
return NULL;
}
- audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
+ if (!reserve)
+ audit_backlog_wait_time = audit_backlog_wait_time_master;
ab = audit_buffer_alloc(ctx, gfp_mask, type);
if (!ab) {
@@ -1759,7 +1761,7 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
} else
audit_log_format(ab, " name=(null)");
- if (n->ino != (unsigned long)-1) {
+ if (n->ino != (unsigned long)-1)
audit_log_format(ab, " inode=%lu"
" dev=%02x:%02x mode=%#ho"
" ouid=%u ogid=%u rdev=%02x:%02x",
@@ -1771,7 +1773,6 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
from_kgid(&init_user_ns, n->gid),
MAJOR(n->rdev),
MINOR(n->rdev));
- }
if (n->osid != 0) {
char *ctx = NULL;
u32 len;
@@ -1838,11 +1839,29 @@ error_path:
}
EXPORT_SYMBOL(audit_log_task_context);
+void audit_log_d_path_exe(struct audit_buffer *ab,
+ struct mm_struct *mm)
+{
+ struct file *exe_file;
+
+ if (!mm)
+ goto out_null;
+
+ exe_file = get_mm_exe_file(mm);
+ if (!exe_file)
+ goto out_null;
+
+ audit_log_d_path(ab, " exe=", &exe_file->f_path);
+ fput(exe_file);
+ return;
+out_null:
+ audit_log_format(ab, " exe=(null)");
+}
+
void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
{
const struct cred *cred;
char comm[sizeof(tsk->comm)];
- struct mm_struct *mm = tsk->mm;
char *tty;
if (!ab)
@@ -1878,13 +1897,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
audit_log_format(ab, " comm=");
audit_log_untrustedstring(ab, get_task_comm(comm, tsk));
- if (mm) {
- down_read(&mm->mmap_sem);
- if (mm->exe_file)
- audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
- up_read(&mm->mmap_sem);
- } else
- audit_log_format(ab, " exe=(null)");
+ audit_log_d_path_exe(ab, tsk->mm);
audit_log_task_context(ab);
}
EXPORT_SYMBOL(audit_log_task_info);
@@ -1915,7 +1928,7 @@ void audit_log_link_denied(const char *operation, struct path *link)
/* Generate AUDIT_PATH record with object. */
name->type = AUDIT_TYPE_NORMAL;
- audit_copy_inode(name, link->dentry, link->dentry->d_inode);
+ audit_copy_inode(name, link->dentry, d_backing_inode(link->dentry));
audit_log_name(current->audit_context, name, link, 0, NULL);
out:
kfree(name);
diff --git a/kernel/audit.h b/kernel/audit.h
index 1caa0d345d90..d641f9bb3ed0 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -257,6 +257,9 @@ extern struct list_head audit_filter_list[];
extern struct audit_entry *audit_dupe_rule(struct audit_krule *old);
+extern void audit_log_d_path_exe(struct audit_buffer *ab,
+ struct mm_struct *mm);
+
/* audit watch functions */
#ifdef CONFIG_AUDIT_WATCH
extern void audit_put_watch(struct audit_watch *watch);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2e0c97427b33..b0f9877273fc 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -37,6 +37,7 @@ struct audit_chunk {
static LIST_HEAD(tree_list);
static LIST_HEAD(prune_list);
+static struct task_struct *prune_thread;
/*
* One struct chunk is attached to each inode of interest.
@@ -576,7 +577,7 @@ int audit_remove_tree_rule(struct audit_krule *rule)
static int compare_root(struct vfsmount *mnt, void *arg)
{
- return mnt->mnt_root->d_inode == arg;
+ return d_backing_inode(mnt->mnt_root) == arg;
}
void audit_trim_trees(void)
@@ -648,7 +649,58 @@ void audit_put_tree(struct audit_tree *tree)
static int tag_mount(struct vfsmount *mnt, void *arg)
{
- return tag_chunk(mnt->mnt_root->d_inode, arg);
+ return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
+}
+
+/*
+ * That gets run when evict_chunk() ends up needing to kill audit_tree.
+ * Runs from a separate thread.
+ */
+static int prune_tree_thread(void *unused)
+{
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (list_empty(&prune_list))
+ schedule();
+ __set_current_state(TASK_RUNNING);
+
+ mutex_lock(&audit_cmd_mutex);
+ mutex_lock(&audit_filter_mutex);
+
+ while (!list_empty(&prune_list)) {
+ struct audit_tree *victim;
+
+ victim = list_entry(prune_list.next,
+ struct audit_tree, list);
+ list_del_init(&victim->list);
+
+ mutex_unlock(&audit_filter_mutex);
+
+ prune_one(victim);
+
+ mutex_lock(&audit_filter_mutex);
+ }
+
+ mutex_unlock(&audit_filter_mutex);
+ mutex_unlock(&audit_cmd_mutex);
+ }
+ return 0;
+}
+
+static int audit_launch_prune(void)
+{
+ if (prune_thread)
+ return 0;
+ prune_thread = kthread_create(prune_tree_thread, NULL,
+ "audit_prune_tree");
+ if (IS_ERR(prune_thread)) {
+ pr_err("cannot start thread audit_prune_tree");
+ prune_thread = NULL;
+ return -ENOMEM;
+ } else {
+ wake_up_process(prune_thread);
+ return 0;
+ }
}
/* called with audit_filter_mutex */
@@ -674,6 +726,12 @@ int audit_add_tree_rule(struct audit_krule *rule)
/* do not set rule->tree yet */
mutex_unlock(&audit_filter_mutex);
+ if (unlikely(!prune_thread)) {
+ err = audit_launch_prune();
+ if (err)
+ goto Err;
+ }
+
err = kern_path(tree->pathname, 0, &path);
if (err)
goto Err;
@@ -811,36 +869,10 @@ int audit_tag_tree(char *old, char *new)
return failed;
}
-/*
- * That gets run when evict_chunk() ends up needing to kill audit_tree.
- * Runs from a separate thread.
- */
-static int prune_tree_thread(void *unused)
-{
- mutex_lock(&audit_cmd_mutex);
- mutex_lock(&audit_filter_mutex);
-
- while (!list_empty(&prune_list)) {
- struct audit_tree *victim;
-
- victim = list_entry(prune_list.next, struct audit_tree, list);
- list_del_init(&victim->list);
-
- mutex_unlock(&audit_filter_mutex);
-
- prune_one(victim);
-
- mutex_lock(&audit_filter_mutex);
- }
-
- mutex_unlock(&audit_filter_mutex);
- mutex_unlock(&audit_cmd_mutex);
- return 0;
-}
static void audit_schedule_prune(void)
{
- kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+ wake_up_process(prune_thread);
}
/*
@@ -907,9 +939,9 @@ static void evict_chunk(struct audit_chunk *chunk)
for (n = 0; n < chunk->count; n++)
list_del_init(&chunk->owners[n].list);
spin_unlock(&hash_lock);
+ mutex_unlock(&audit_filter_mutex);
if (need_prune)
audit_schedule_prune();
- mutex_unlock(&audit_filter_mutex);
}
static int audit_tree_handle_event(struct fsnotify_group *group,
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index ad9c1682f616..6e30024d9aac 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -146,7 +146,7 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
/* Initialize a parent watch entry. */
static struct audit_parent *audit_init_parent(struct path *path)
{
- struct inode *inode = path->dentry->d_inode;
+ struct inode *inode = d_backing_inode(path->dentry);
struct audit_parent *parent;
int ret;
@@ -361,11 +361,11 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- mutex_unlock(&parent->dentry->d_inode->i_mutex);
- if (d->d_inode) {
+ mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+ if (d_is_positive(d)) {
/* update watch filter fields */
- watch->dev = d->d_inode->i_sb->s_dev;
- watch->ino = d->d_inode->i_ino;
+ watch->dev = d_backing_inode(d)->i_sb->s_dev;
+ watch->ino = d_backing_inode(d)->i_ino;
}
dput(d);
return 0;
@@ -426,7 +426,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
return ret;
/* either find an old parent or attach a new one */
- parent = audit_find_parent(parent_path.dentry->d_inode);
+ parent = audit_find_parent(d_backing_inode(parent_path.dentry));
if (!parent) {
parent = audit_init_parent(&parent_path);
if (IS_ERR(parent)) {
@@ -482,7 +482,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
switch (data_type) {
case (FSNOTIFY_EVENT_PATH):
- inode = ((struct path *)data)->dentry->d_inode;
+ inode = d_backing_inode(((struct path *)data)->dentry);
break;
case (FSNOTIFY_EVENT_INODE):
inode = (struct inode *)data;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index dc4ae70a7413..9fb9d1cb83ce 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1629,7 +1629,7 @@ retry:
rcu_read_lock();
seq = read_seqbegin(&rename_lock);
for(;;) {
- struct inode *inode = d->d_inode;
+ struct inode *inode = d_backing_inode(d);
if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) {
struct audit_chunk *chunk;
chunk = audit_tree_lookup(inode);
@@ -1754,7 +1754,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
unsigned int flags)
{
struct audit_context *context = current->audit_context;
- const struct inode *inode = dentry->d_inode;
+ const struct inode *inode = d_backing_inode(dentry);
struct audit_names *n;
bool parent = flags & AUDIT_INODE_PARENT;
@@ -1853,7 +1853,7 @@ void __audit_inode_child(const struct inode *parent,
const unsigned char type)
{
struct audit_context *context = current->audit_context;
- const struct inode *inode = dentry->d_inode;
+ const struct inode *inode = d_backing_inode(dentry);
const char *dname = dentry->d_name.name;
struct audit_names *n, *found_parent = NULL, *found_child = NULL;
@@ -2361,7 +2361,6 @@ static void audit_log_task(struct audit_buffer *ab)
kuid_t auid, uid;
kgid_t gid;
unsigned int sessionid;
- struct mm_struct *mm = current->mm;
char comm[sizeof(current->comm)];
auid = audit_get_loginuid(current);
@@ -2376,13 +2375,7 @@ static void audit_log_task(struct audit_buffer *ab)
audit_log_task_context(ab);
audit_log_format(ab, " pid=%d comm=", task_pid_nr(current));
audit_log_untrustedstring(ab, get_task_comm(comm, current));
- if (mm) {
- down_read(&mm->mmap_sem);
- if (mm->exe_file)
- audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
- up_read(&mm->mmap_sem);
- } else
- audit_log_format(ab, " exe=(null)");
+ audit_log_d_path_exe(ab, current->mm);
}
/**
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8a6616583f38..cb31229a6fa4 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -14,12 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/mm.h>
-
-struct bpf_array {
- struct bpf_map map;
- u32 elem_size;
- char value[0] __aligned(8);
-};
+#include <linux/filter.h>
/* Called from syscall */
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
@@ -154,3 +149,109 @@ static int __init register_array_map(void)
return 0;
}
late_initcall(register_array_map);
+
+static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
+{
+ /* only bpf_prog file descriptors can be stored in prog_array map */
+ if (attr->value_size != sizeof(u32))
+ return ERR_PTR(-EINVAL);
+ return array_map_alloc(attr);
+}
+
+static void prog_array_map_free(struct bpf_map *map)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ int i;
+
+ synchronize_rcu();
+
+ /* make sure it's empty */
+ for (i = 0; i < array->map.max_entries; i++)
+ BUG_ON(array->prog[i] != NULL);
+ kvfree(array);
+}
+
+static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ return NULL;
+}
+
+/* only called from syscall */
+static int prog_array_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct bpf_prog *prog, *old_prog;
+ u32 index = *(u32 *)key, ufd;
+
+ if (map_flags != BPF_ANY)
+ return -EINVAL;
+
+ if (index >= array->map.max_entries)
+ return -E2BIG;
+
+ ufd = *(u32 *)value;
+ prog = bpf_prog_get(ufd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (!bpf_prog_array_compatible(array, prog)) {
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
+ old_prog = xchg(array->prog + index, prog);
+ if (old_prog)
+ bpf_prog_put_rcu(old_prog);
+
+ return 0;
+}
+
+static int prog_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct bpf_prog *old_prog;
+ u32 index = *(u32 *)key;
+
+ if (index >= array->map.max_entries)
+ return -E2BIG;
+
+ old_prog = xchg(array->prog + index, NULL);
+ if (old_prog) {
+ bpf_prog_put_rcu(old_prog);
+ return 0;
+ } else {
+ return -ENOENT;
+ }
+}
+
+/* decrement refcnt of all bpf_progs that are stored in this map */
+void bpf_prog_array_map_clear(struct bpf_map *map)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ int i;
+
+ for (i = 0; i < array->map.max_entries; i++)
+ prog_array_map_delete_elem(map, &i);
+}
+
+static const struct bpf_map_ops prog_array_ops = {
+ .map_alloc = prog_array_map_alloc,
+ .map_free = prog_array_map_free,
+ .map_get_next_key = array_map_get_next_key,
+ .map_lookup_elem = prog_array_map_lookup_elem,
+ .map_update_elem = prog_array_map_update_elem,
+ .map_delete_elem = prog_array_map_delete_elem,
+};
+
+static struct bpf_map_type_list prog_array_type __read_mostly = {
+ .ops = &prog_array_ops,
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+};
+
+static int __init register_prog_array_map(void)
+{
+ bpf_register_map_type(&prog_array_type);
+ return 0;
+}
+late_initcall(register_prog_array_map);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4139a0f8b558..1e00aa3316dc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -26,9 +26,10 @@
#include <linux/vmalloc.h>
#include <linux/random.h>
#include <linux/moduleloader.h>
-#include <asm/unaligned.h>
#include <linux/bpf.h>
+#include <asm/unaligned.h>
+
/* Registers */
#define BPF_R0 regs[BPF_REG_0]
#define BPF_R1 regs[BPF_REG_1]
@@ -62,6 +63,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
+
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
@@ -244,6 +246,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
/* Call instruction */
[BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
/* Jumps */
[BPF_JMP | BPF_JA] = &&JMP_JA,
[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
@@ -286,6 +289,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
[BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
};
+ u32 tail_call_cnt = 0;
void *ptr;
int off;
@@ -357,8 +361,8 @@ select_insn:
ALU64_MOD_X:
if (unlikely(SRC == 0))
return 0;
- tmp = DST;
- DST = do_div(tmp, SRC);
+ div64_u64_rem(DST, SRC, &tmp);
+ DST = tmp;
CONT;
ALU_MOD_X:
if (unlikely(SRC == 0))
@@ -367,8 +371,8 @@ select_insn:
DST = do_div(tmp, (u32) SRC);
CONT;
ALU64_MOD_K:
- tmp = DST;
- DST = do_div(tmp, IMM);
+ div64_u64_rem(DST, IMM, &tmp);
+ DST = tmp;
CONT;
ALU_MOD_K:
tmp = (u32) DST;
@@ -377,7 +381,7 @@ select_insn:
ALU64_DIV_X:
if (unlikely(SRC == 0))
return 0;
- do_div(DST, SRC);
+ DST = div64_u64(DST, SRC);
CONT;
ALU_DIV_X:
if (unlikely(SRC == 0))
@@ -387,7 +391,7 @@ select_insn:
DST = (u32) tmp;
CONT;
ALU64_DIV_K:
- do_div(DST, IMM);
+ DST = div64_u64(DST, IMM);
CONT;
ALU_DIV_K:
tmp = (u32) DST;
@@ -431,6 +435,30 @@ select_insn:
BPF_R4, BPF_R5);
CONT;
+ JMP_TAIL_CALL: {
+ struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct bpf_prog *prog;
+ u64 index = BPF_R3;
+
+ if (unlikely(index >= array->map.max_entries))
+ goto out;
+
+ if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
+ goto out;
+
+ tail_call_cnt++;
+
+ prog = READ_ONCE(array->prog[index]);
+ if (unlikely(!prog))
+ goto out;
+
+ ARG1 = BPF_R1;
+ insn = prog->insnsi;
+ goto select_insn;
+out:
+ CONT;
+ }
/* JMP */
JMP_JA:
insn += insn->off;
@@ -615,25 +643,63 @@ load_byte:
return 0;
}
-void __weak bpf_int_jit_compile(struct bpf_prog *prog)
+bool bpf_prog_array_compatible(struct bpf_array *array,
+ const struct bpf_prog *fp)
{
+ if (!array->owner_prog_type) {
+ /* There's no owner yet where we could check for
+ * compatibility.
+ */
+ array->owner_prog_type = fp->type;
+ array->owner_jited = fp->jited;
+
+ return true;
+ }
+
+ return array->owner_prog_type == fp->type &&
+ array->owner_jited == fp->jited;
+}
+
+static int bpf_check_tail_call(const struct bpf_prog *fp)
+{
+ struct bpf_prog_aux *aux = fp->aux;
+ int i;
+
+ for (i = 0; i < aux->used_map_cnt; i++) {
+ struct bpf_map *map = aux->used_maps[i];
+ struct bpf_array *array;
+
+ if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+ continue;
+
+ array = container_of(map, struct bpf_array, map);
+ if (!bpf_prog_array_compatible(array, fp))
+ return -EINVAL;
+ }
+
+ return 0;
}
/**
- * bpf_prog_select_runtime - select execution runtime for BPF program
+ * bpf_prog_select_runtime - select exec runtime for BPF program
* @fp: bpf_prog populated with internal BPF program
*
- * try to JIT internal BPF program, if JIT is not available select interpreter
- * BPF program will be executed via BPF_PROG_RUN() macro
+ * Try to JIT eBPF program, if JIT is not available, use interpreter.
+ * The BPF program will be executed via BPF_PROG_RUN() macro.
*/
-void bpf_prog_select_runtime(struct bpf_prog *fp)
+int bpf_prog_select_runtime(struct bpf_prog *fp)
{
fp->bpf_func = (void *) __bpf_prog_run;
- /* Probe if internal BPF can be JITed */
bpf_int_jit_compile(fp);
- /* Lock whole bpf_prog as read-only */
bpf_prog_lock_ro(fp);
+
+ /* The tail call compatibility check can only be done at
+ * this late stage as we need to determine, if we deal
+ * with JITed or non JITed program concatenations and not
+ * all eBPF JITs might immediately support all features.
+ */
+ return bpf_check_tail_call(fp);
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
@@ -663,6 +729,22 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
+const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
+
+/* Always built-in helper functions. */
+const struct bpf_func_proto bpf_tail_call_proto = {
+ .func = NULL,
+ .gpl_only = false,
+ .ret_type = RET_VOID,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+};
+
+/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */
+void __weak bpf_int_jit_compile(struct bpf_prog *prog)
+{
+}
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bd7f5988ed9c..7ad5d8842d5b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -13,6 +13,7 @@
#include <linux/rcupdate.h>
#include <linux/random.h>
#include <linux/smp.h>
+#include <linux/ktime.h>
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -44,11 +45,11 @@ static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
- .func = bpf_map_lookup_elem,
- .gpl_only = false,
- .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_MAP_KEY,
+ .func = bpf_map_lookup_elem,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_MAP_KEY,
};
static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
@@ -63,13 +64,13 @@ static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
const struct bpf_func_proto bpf_map_update_elem_proto = {
- .func = bpf_map_update_elem,
- .gpl_only = false,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_MAP_KEY,
- .arg3_type = ARG_PTR_TO_MAP_VALUE,
- .arg4_type = ARG_ANYTHING,
+ .func = bpf_map_update_elem,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_MAP_KEY,
+ .arg3_type = ARG_PTR_TO_MAP_VALUE,
+ .arg4_type = ARG_ANYTHING,
};
static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
@@ -83,11 +84,11 @@ static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
const struct bpf_func_proto bpf_map_delete_elem_proto = {
- .func = bpf_map_delete_elem,
- .gpl_only = false,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_MAP_KEY,
+ .func = bpf_map_delete_elem,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_MAP_KEY,
};
static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
@@ -111,3 +112,15 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
};
+
+static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ /* NMI safe access to clock monotonic */
+ return ktime_get_mono_fast_ns();
+}
+
+const struct bpf_func_proto bpf_ktime_get_ns_proto = {
+ .func = bpf_ktime_get_ns,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3bae6c591914..a1b14d197a4f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -68,6 +68,12 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
{
struct bpf_map *map = filp->private_data;
+ if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
+ /* prog_array stores refcnt-ed bpf_prog pointers
+ * release them all when user space closes prog_array_fd
+ */
+ bpf_prog_array_map_clear(map);
+
bpf_map_put(map);
return 0;
}
@@ -392,6 +398,19 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
*/
BUG_ON(!prog->aux->ops->get_func_proto);
+ if (insn->imm == BPF_FUNC_tail_call) {
+ /* mark bpf_tail_call as different opcode
+ * to avoid conditional branch in
+ * interpeter for every normal call
+ * and to prevent accidental JITing by
+ * JIT compiler that doesn't support
+ * bpf_tail_call yet
+ */
+ insn->imm = 0;
+ insn->code |= BPF_X;
+ continue;
+ }
+
fn = prog->aux->ops->get_func_proto(insn->imm);
/* all functions that have prototype and verifier allowed
* programs to call them, must be real in-kernel functions
@@ -413,6 +432,23 @@ static void free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
+static void __prog_put_rcu(struct rcu_head *rcu)
+{
+ struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
+
+ free_used_maps(aux);
+ bpf_prog_free(aux->prog);
+}
+
+/* version of bpf_prog_put() that is called after a grace period */
+void bpf_prog_put_rcu(struct bpf_prog *prog)
+{
+ if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ prog->aux->prog = prog;
+ call_rcu(&prog->aux->rcu, __prog_put_rcu);
+ }
+}
+
void bpf_prog_put(struct bpf_prog *prog)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
@@ -426,7 +462,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
{
struct bpf_prog *prog = filp->private_data;
- bpf_prog_put(prog);
+ bpf_prog_put_rcu(prog);
return 0;
}
@@ -532,7 +568,9 @@ static int bpf_prog_load(union bpf_attr *attr)
fixup_bpf_calls(prog);
/* eBPF program is ready to be JITed */
- bpf_prog_select_runtime(prog);
+ err = bpf_prog_select_runtime(prog);
+ if (err < 0)
+ goto free_used_maps;
err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
if (err < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 47dcd3aa6e23..039d866fd36a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -907,6 +907,23 @@ static int check_call(struct verifier_env *env, int func_id)
fn->ret_type, func_id);
return -EINVAL;
}
+
+ if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
+ func_id != BPF_FUNC_tail_call)
+ /* prog_array map type needs extra care:
+ * only allow to pass it into bpf_tail_call() for now.
+ * bpf_map_delete_elem() can be allowed in the future,
+ * while bpf_map_update_elem() must only be done via syscall
+ */
+ return -EINVAL;
+
+ if (func_id == BPF_FUNC_tail_call &&
+ map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+ /* don't allow any other map type to be passed into
+ * bpf_tail_call()
+ */
+ return -EINVAL;
+
return 0;
}
@@ -1675,6 +1692,8 @@ static int do_check(struct verifier_env *env)
}
} else if (class == BPF_STX) {
+ enum bpf_reg_type dst_reg_type;
+
if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn);
if (err)
@@ -1683,11 +1702,6 @@ static int do_check(struct verifier_env *env)
continue;
}
- if (BPF_MODE(insn->code) != BPF_MEM ||
- insn->imm != 0) {
- verbose("BPF_STX uses reserved fields\n");
- return -EINVAL;
- }
/* check src1 operand */
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
if (err)
@@ -1697,6 +1711,8 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
+ dst_reg_type = regs[insn->dst_reg].type;
+
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
@@ -1704,6 +1720,15 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
+ if (insn->imm == 0) {
+ insn->imm = dst_reg_type;
+ } else if (dst_reg_type != insn->imm &&
+ (dst_reg_type == PTR_TO_CTX ||
+ insn->imm == PTR_TO_CTX)) {
+ verbose("same insn cannot be used with different pointers\n");
+ return -EINVAL;
+ }
+
} else if (class == BPF_ST) {
if (BPF_MODE(insn->code) != BPF_MEM ||
insn->src_reg != BPF_REG_0) {
@@ -1822,12 +1847,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
for (i = 0; i < insn_cnt; i++, insn++) {
if (BPF_CLASS(insn->code) == BPF_LDX &&
- (BPF_MODE(insn->code) != BPF_MEM ||
- insn->imm != 0)) {
+ (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
verbose("BPF_LDX uses reserved fields\n");
return -EINVAL;
}
+ if (BPF_CLASS(insn->code) == BPF_STX &&
+ ((BPF_MODE(insn->code) != BPF_MEM &&
+ BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
+ verbose("BPF_STX uses reserved fields\n");
+ return -EINVAL;
+ }
+
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_map *map;
struct fd f;
@@ -1950,12 +1981,17 @@ static int convert_ctx_accesses(struct verifier_env *env)
struct bpf_prog *new_prog;
u32 cnt;
int i;
+ enum bpf_access_type type;
if (!env->prog->aux->ops->convert_ctx_access)
return 0;
for (i = 0; i < insn_cnt; i++, insn++) {
- if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+ if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+ type = BPF_READ;
+ else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+ type = BPF_WRITE;
+ else
continue;
if (insn->imm != PTR_TO_CTX) {
@@ -1965,7 +2001,7 @@ static int convert_ctx_accesses(struct verifier_env *env)
}
cnt = env->prog->aux->ops->
- convert_ctx_access(insn->dst_reg, insn->src_reg,
+ convert_ctx_access(type, insn->dst_reg, insn->src_reg,
insn->off, insn_buf);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
verbose("bpf verifier is misconfigured\n");
diff --git a/kernel/compat.c b/kernel/compat.c
index 24f00610c575..333d364be29d 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -912,7 +912,8 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
* bitmap. We must however ensure the end of the
* kernel bitmap is zeroed.
*/
- if (nr_compat_longs-- > 0) {
+ if (nr_compat_longs) {
+ nr_compat_longs--;
if (__get_user(um, umask))
return -EFAULT;
} else {
@@ -954,7 +955,8 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
* We dont want to write past the end of the userspace
* bitmap.
*/
- if (nr_compat_longs-- > 0) {
+ if (nr_compat_longs) {
+ nr_compat_longs--;
if (__put_user(um, umask))
return -EFAULT;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 81aa3a4ece9f..eddf1ed4155e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx)
* Those places that change perf_event::ctx will hold both
* perf_event_ctx::mutex of the 'old' and 'new' ctx value.
*
- * Lock ordering is by mutex address. There is one other site where
- * perf_event_context::mutex nests and that is put_event(). But remember that
- * that is a parent<->child context relation, and migration does not affect
- * children, therefore these two orderings should not interact.
+ * Lock ordering is by mutex address. There are two other sites where
+ * perf_event_context::mutex nests and those are:
+ *
+ * - perf_event_exit_task_context() [ child , 0 ]
+ * __perf_event_exit_task()
+ * sync_child_event()
+ * put_event() [ parent, 1 ]
+ *
+ * - perf_event_init_context() [ parent, 0 ]
+ * inherit_task_group()
+ * inherit_group()
+ * inherit_event()
+ * perf_event_alloc()
+ * perf_init_event()
+ * perf_try_init_event() [ child , 1 ]
+ *
+ * While it appears there is an obvious deadlock here -- the parent and child
+ * nesting levels are inverted between the two. This is in fact safe because
+ * life-time rules separate them. That is an exiting task cannot fork, and a
+ * spawning task cannot (yet) exit.
+ *
+ * But remember that that these are parent<->child context relations, and
+ * migration does not affect children, therefore these two orderings should not
+ * interact.
*
* The change in perf_event::ctx does not affect children (as claimed above)
* because the sys_perf_event_open() case will install a new event and break
@@ -3422,7 +3442,6 @@ static void free_event_rcu(struct rcu_head *head)
if (event->ns)
put_pid_ns(event->ns);
perf_event_free_filter(event);
- perf_event_free_bpf_prog(event);
kfree(event);
}
@@ -3553,6 +3572,8 @@ static void __free_event(struct perf_event *event)
put_callchain_buffers();
}
+ perf_event_free_bpf_prog(event);
+
if (event->destroy)
event->destroy(event);
@@ -3657,9 +3678,6 @@ static void perf_remove_from_owner(struct perf_event *event)
}
}
-/*
- * Called when the last reference to the file is gone.
- */
static void put_event(struct perf_event *event)
{
struct perf_event_context *ctx;
@@ -3697,6 +3715,9 @@ int perf_event_release_kernel(struct perf_event *event)
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+/*
+ * Called when the last reference to the file is gone.
+ */
static int perf_release(struct inode *inode, struct file *file)
{
put_event(file->private_data);
@@ -7364,7 +7385,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
return -ENODEV;
if (event->group_leader != event) {
- ctx = perf_event_ctx_lock(event->group_leader);
+ /*
+ * This ctx->mutex can nest when we're called through
+ * inheritance. See the perf_event_ctx_lock_nested() comment.
+ */
+ ctx = perf_event_ctx_lock_nested(event->group_leader,
+ SINGLE_DEPTH_NESTING);
BUG_ON(!ctx);
}
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 232f00f273cb..725c416085e3 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -493,6 +493,20 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
}
+ /*
+ * In overwrite mode, PMUs that don't support SG may not handle more
+ * than one contiguous allocation, since they rely on PMI to do double
+ * buffering. In this case, the entire buffer has to be one contiguous
+ * chunk.
+ */
+ if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) &&
+ overwrite) {
+ struct page *page = virt_to_page(rb->aux_pages[0]);
+
+ if (page_private(page) != max_order)
+ goto out;
+ }
+
rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
overwrite);
if (!rb->aux_priv)
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
index 988dc58e8847..2feb6feca0cc 100644
--- a/kernel/irq/dummychip.c
+++ b/kernel/irq/dummychip.c
@@ -57,5 +57,6 @@ struct irq_chip dummy_irq_chip = {
.irq_ack = noop,
.irq_mask = noop,
.irq_unmask = noop,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
EXPORT_SYMBOL_GPL(dummy_irq_chip);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 38c25b1f2fd5..7a36fdcca5bf 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -707,7 +707,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
do {
unsigned long pfn, epfn, addr, eaddr;
- pages = kimage_alloc_pages(GFP_KERNEL, order);
+ pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
if (!pages)
break;
pfn = page_to_pfn(pages);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ba77ab5f64dd..a0831e1b99f4 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -551,7 +551,21 @@ static void print_lockdep_cache(struct lockdep_map *lock)
static void print_lock(struct held_lock *hlock)
{
- print_lock_name(hlock_class(hlock));
+ /*
+ * We can be called locklessly through debug_show_all_locks() so be
+ * extra careful, the hlock might have been released and cleared.
+ */
+ unsigned int class_idx = hlock->class_idx;
+
+ /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfields: */
+ barrier();
+
+ if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) {
+ printk("<RELEASED>\n");
+ return;
+ }
+
+ print_lock_name(lock_classes + class_idx - 1);
printk(", at: ");
print_ip_sym(hlock->acquire_ip);
}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b73279367087..b025295f4966 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -265,15 +265,17 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
}
/*
- * Called by sched_setscheduler() to check whether the priority change
- * is overruled by a possible priority boosting.
+ * Called by sched_setscheduler() to get the priority which will be
+ * effective after the change.
*/
-int rt_mutex_check_prio(struct task_struct *task, int newprio)
+int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
{
if (!task_has_pi_waiters(task))
- return 0;
+ return newprio;
- return task_top_pi_waiter(task)->task->prio <= newprio;
+ if (task_top_pi_waiter(task)->task->prio <= newprio)
+ return task_top_pi_waiter(task)->task->prio;
+ return newprio;
}
/*
diff --git a/kernel/module.c b/kernel/module.c
index 650b038ae520..cfc9e843a924 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -387,9 +387,9 @@ static bool check_symbol(const struct symsearch *syms,
pr_warn("Symbol %s is marked as UNUSED, however this module is "
"using it.\n", fsa->name);
pr_warn("This symbol will go away in the future.\n");
- pr_warn("Please evalute if this is the right api to use and if "
- "it really is, submit a report the linux kernel "
- "mailinglist together with submitting your code for "
+ pr_warn("Please evaluate if this is the right api to use and "
+ "if it really is, submit a report to the linux kernel "
+ "mailing list together with submitting your code for "
"inclusion.\n");
}
#endif
@@ -2511,7 +2511,8 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
return err;
/* Suck in entire file: we'll want most of it. */
- info->hdr = vmalloc(info->len);
+ info->hdr = __vmalloc(info->len,
+ GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, PAGE_KERNEL);
if (!info->hdr)
return -ENOMEM;
@@ -3369,6 +3370,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
module_bug_cleanup(mod);
mutex_unlock(&module_mutex);
+ blocking_notifier_call_chain(&module_notify_list,
+ MODULE_STATE_GOING, mod);
+
/* we can't deallocate the module until we clear memory protection */
unset_module_init_ro_nx(mod);
unset_module_core_ro_nx(mod);
diff --git a/kernel/params.c b/kernel/params.c
index 728e05b167de..a22d6a759b1a 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -173,9 +173,9 @@ static char *next_arg(char *args, char **param, char **val)
if (args[i-1] == '"')
args[i-1] = '\0';
}
- if (quoted && args[i-1] == '"')
- args[i-1] = '\0';
}
+ if (quoted && args[i-1] == '"')
+ args[i-1] = '\0';
if (args[i]) {
args[i] = '\0';
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 879edfc5ee52..c099b082cd02 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2017,24 +2017,6 @@ int add_preferred_console(char *name, int idx, char *options)
return __add_preferred_console(name, idx, options, NULL);
}
-int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options)
-{
- struct console_cmdline *c;
- int i;
-
- for (i = 0, c = console_cmdline;
- i < MAX_CMDLINECONSOLES && c->name[0];
- i++, c++)
- if (strcmp(c->name, name) == 0 && c->index == idx) {
- strlcpy(c->name, name_new, sizeof(c->name));
- c->options = options;
- c->index = idx_new;
- return i;
- }
- /* not found */
- return -1;
-}
-
bool console_suspend_enabled = true;
EXPORT_SYMBOL(console_suspend_enabled);
@@ -2436,9 +2418,6 @@ void register_console(struct console *newcon)
if (preferred_console < 0 || bcon || !console_drivers)
preferred_console = selected_console;
- if (newcon->early_setup)
- newcon->early_setup();
-
/*
* See if we want to use this console driver. If we
* didn't select a console we take the first one
@@ -2464,23 +2443,27 @@ void register_console(struct console *newcon)
for (i = 0, c = console_cmdline;
i < MAX_CMDLINECONSOLES && c->name[0];
i++, c++) {
- BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
- if (strcmp(c->name, newcon->name) != 0)
- continue;
- if (newcon->index >= 0 &&
- newcon->index != c->index)
- continue;
- if (newcon->index < 0)
- newcon->index = c->index;
+ if (!newcon->match ||
+ newcon->match(newcon, c->name, c->index, c->options) != 0) {
+ /* default matching */
+ BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
+ if (strcmp(c->name, newcon->name) != 0)
+ continue;
+ if (newcon->index >= 0 &&
+ newcon->index != c->index)
+ continue;
+ if (newcon->index < 0)
+ newcon->index = c->index;
- if (_braille_register_console(newcon, c))
- return;
+ if (_braille_register_console(newcon, c))
+ return;
+
+ if (newcon->setup &&
+ newcon->setup(newcon, c->options) != 0)
+ break;
+ }
- if (newcon->setup &&
- newcon->setup(newcon, console_cmdline[i].options) != 0)
- break;
newcon->flags |= CON_ENABLED;
- newcon->index = c->index;
if (i == selected_console) {
newcon->flags |= CON_CONSDEV;
preferred_console = selected_console;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 233165da782f..8cf7304b2867 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -162,11 +162,14 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
module_param(kthread_prio, int, 0644);
-/* Delay in jiffies for grace-period initialization delays. */
-static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT)
- ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
- : 0;
+/* Delay in jiffies for grace-period initialization delays, debug only. */
+#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT
+static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY;
module_param(gp_init_delay, int, 0644);
+#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
+static const int gp_init_delay;
+#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
+#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */
/*
* Track the rcutorture test sequence number and the update version
@@ -1843,9 +1846,8 @@ static int rcu_gp_init(struct rcu_state *rsp)
raw_spin_unlock_irq(&rnp->lock);
cond_resched_rcu_qs();
ACCESS_ONCE(rsp->gp_activity) = jiffies;
- if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) &&
- gp_init_delay > 0 &&
- !(rsp->gpnum % (rcu_num_nodes * 10)))
+ if (gp_init_delay > 0 &&
+ !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD)))
schedule_timeout_uninterruptible(gp_init_delay);
}
diff --git a/kernel/relay.c b/kernel/relay.c
index 5a56d3c8dc03..e9dbaeb8fd65 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -407,7 +407,7 @@ static inline void relay_set_buf_dentry(struct rchan_buf *buf,
struct dentry *dentry)
{
buf->dentry = dentry;
- buf->dentry->d_inode->i_size = buf->early_bytes;
+ d_inode(buf->dentry)->i_size = buf->early_bytes;
}
static struct dentry *relay_create_buf_file(struct rchan *chan,
@@ -733,7 +733,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
buf->padding[old_subbuf] = buf->prev_padding;
buf->subbufs_produced++;
if (buf->dentry)
- buf->dentry->d_inode->i_size +=
+ d_inode(buf->dentry)->i_size +=
buf->chan->subbuf_size -
buf->padding[old_subbuf];
else
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f9123a82cbb6..123673291ffb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
rq_clock_skip_update(rq, true);
}
-static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
-
-void register_task_migration_notifier(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&task_migration_notifier, n);
-}
-
#ifdef CONFIG_SMP
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
@@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
trace_sched_migrate_task(p, new_cpu);
if (task_cpu(p) != new_cpu) {
- struct task_migration_notifier tmn;
-
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
-
- tmn.task = p;
- tmn.from_cpu = task_cpu(p);
- tmn.to_cpu = new_cpu;
-
- atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
}
__set_task_cpu(p, new_cpu);
@@ -3315,15 +3300,18 @@ static void __setscheduler_params(struct task_struct *p,
/* Actually do priority change: must hold pi & rq lock. */
static void __setscheduler(struct rq *rq, struct task_struct *p,
- const struct sched_attr *attr)
+ const struct sched_attr *attr, bool keep_boost)
{
__setscheduler_params(p, attr);
/*
- * If we get here, there was no pi waiters boosting the
- * task. It is safe to use the normal prio.
+ * Keep a potential priority boosting if called from
+ * sched_setscheduler().
*/
- p->prio = normal_prio(p);
+ if (keep_boost)
+ p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
+ else
+ p->prio = normal_prio(p);
if (dl_prio(p->prio))
p->sched_class = &dl_sched_class;
@@ -3423,7 +3411,7 @@ static int __sched_setscheduler(struct task_struct *p,
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
MAX_RT_PRIO - 1 - attr->sched_priority;
int retval, oldprio, oldpolicy = -1, queued, running;
- int policy = attr->sched_policy;
+ int new_effective_prio, policy = attr->sched_policy;
unsigned long flags;
const struct sched_class *prev_class;
struct rq *rq;
@@ -3605,15 +3593,14 @@ change:
oldprio = p->prio;
/*
- * Special case for priority boosted tasks.
- *
- * If the new priority is lower or equal (user space view)
- * than the current (boosted) priority, we just store the new
+ * Take priority boosted tasks into account. If the new
+ * effective priority is unchanged, we just store the new
* normal parameters and do not touch the scheduler class and
* the runqueue. This will be done when the task deboost
* itself.
*/
- if (rt_mutex_check_prio(p, newprio)) {
+ new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+ if (new_effective_prio == oldprio) {
__setscheduler_params(p, attr);
task_rq_unlock(rq, p, &flags);
return 0;
@@ -3627,7 +3614,7 @@ change:
put_prev_task(rq, p);
prev_class = p->sched_class;
- __setscheduler(rq, p, attr);
+ __setscheduler(rq, p, attr, true);
if (running)
p->sched_class->set_curr_task(rq);
@@ -4402,10 +4389,7 @@ long __sched io_schedule_timeout(long timeout)
long ret;
current->in_iowait = 1;
- if (old_iowait)
- blk_schedule_flush_plug(current);
- else
- blk_flush_plug(current);
+ blk_schedule_flush_plug(current);
delayacct_blkio_start();
rq = raw_rq();
@@ -7012,27 +6996,23 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
unsigned long flags;
long cpu = (long)hcpu;
struct dl_bw *dl_b;
+ bool overflow;
+ int cpus;
- switch (action & ~CPU_TASKS_FROZEN) {
+ switch (action) {
case CPU_DOWN_PREPARE:
- /* explicitly allow suspend */
- if (!(action & CPU_TASKS_FROZEN)) {
- bool overflow;
- int cpus;
-
- rcu_read_lock_sched();
- dl_b = dl_bw_of(cpu);
+ rcu_read_lock_sched();
+ dl_b = dl_bw_of(cpu);
- raw_spin_lock_irqsave(&dl_b->lock, flags);
- cpus = dl_bw_cpus(cpu);
- overflow = __dl_overflow(dl_b, cpus, 0, 0);
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+ raw_spin_lock_irqsave(&dl_b->lock, flags);
+ cpus = dl_bw_cpus(cpu);
+ overflow = __dl_overflow(dl_b, cpus, 0, 0);
+ raw_spin_unlock_irqrestore(&dl_b->lock, flags);
- rcu_read_unlock_sched();
+ rcu_read_unlock_sched();
- if (overflow)
- return notifier_from_errno(-EBUSY);
- }
+ if (overflow)
+ return notifier_from_errno(-EBUSY);
cpuset_update_active_cpus(false);
break;
case CPU_DOWN_PREPARE_FROZEN:
@@ -7361,7 +7341,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
queued = task_on_rq_queued(p);
if (queued)
dequeue_task(rq, p, 0);
- __setscheduler(rq, p, &attr);
+ __setscheduler(rq, p, &attr, false);
if (queued) {
enqueue_task(rq, p, 0);
resched_curr(rq);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index deef1caa94c6..fefcb1fa5160 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -81,7 +81,6 @@ static void cpuidle_idle_call(void)
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int next_state, entered_state;
- unsigned int broadcast;
bool reflect;
/*
@@ -150,17 +149,6 @@ static void cpuidle_idle_call(void)
goto exit_idle;
}
- broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP;
-
- /*
- * Tell the time framework to switch to a broadcast timer
- * because our local timer will be shutdown. If a local timer
- * is used from another cpu as a broadcast timer, this call may
- * fail if it is not available
- */
- if (broadcast && tick_broadcast_enter())
- goto use_default;
-
/* Take note of the planned idle state. */
idle_set_state(this_rq(), &drv->states[next_state]);
@@ -174,8 +162,8 @@ static void cpuidle_idle_call(void)
/* The cpu is no longer idle or about to enter idle. */
idle_set_state(this_rq(), NULL);
- if (broadcast)
- tick_broadcast_exit();
+ if (entered_state == -EBUSY)
+ goto use_default;
/*
* Give the governor an opportunity to reflect on the outcome
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 4f44028943e6..245df6b32b81 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -346,16 +346,13 @@ static inline void seccomp_sync_threads(void)
*/
static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
{
- struct seccomp_filter *filter;
- unsigned long fp_size;
- struct sock_filter *fp;
- int new_len;
- long ret;
+ struct seccomp_filter *sfilter;
+ int ret;
if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
return ERR_PTR(-EINVAL);
+
BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
- fp_size = fprog->len * sizeof(struct sock_filter);
/*
* Installing a seccomp filter requires that the task has
@@ -368,60 +365,21 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
CAP_SYS_ADMIN) != 0)
return ERR_PTR(-EACCES);
- fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
- if (!fp)
- return ERR_PTR(-ENOMEM);
-
- /* Copy the instructions from fprog. */
- ret = -EFAULT;
- if (copy_from_user(fp, fprog->filter, fp_size))
- goto free_prog;
-
- /* Check and rewrite the fprog via the skb checker */
- ret = bpf_check_classic(fp, fprog->len);
- if (ret)
- goto free_prog;
-
- /* Check and rewrite the fprog for seccomp use */
- ret = seccomp_check_filter(fp, fprog->len);
- if (ret)
- goto free_prog;
-
- /* Convert 'sock_filter' insns to 'bpf_insn' insns */
- ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len);
- if (ret)
- goto free_prog;
-
/* Allocate a new seccomp_filter */
- ret = -ENOMEM;
- filter = kzalloc(sizeof(struct seccomp_filter),
- GFP_KERNEL|__GFP_NOWARN);
- if (!filter)
- goto free_prog;
-
- filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
- if (!filter->prog)
- goto free_filter;
-
- ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
- if (ret)
- goto free_filter_prog;
-
- kfree(fp);
- atomic_set(&filter->usage, 1);
- filter->prog->len = new_len;
+ sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
+ if (!sfilter)
+ return ERR_PTR(-ENOMEM);
- bpf_prog_select_runtime(filter->prog);
+ ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
+ seccomp_check_filter);
+ if (ret < 0) {
+ kfree(sfilter);
+ return ERR_PTR(ret);
+ }
- return filter;
+ atomic_set(&sfilter->usage, 1);
-free_filter_prog:
- __bpf_prog_free(filter->prog);
-free_filter:
- kfree(filter);
-free_prog:
- kfree(fp);
- return ERR_PTR(ret);
+ return sfilter;
}
/**
diff --git a/kernel/smp.c b/kernel/smp.c
index f38a1e692259..07854477c164 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -19,7 +19,7 @@
enum {
CSD_FLAG_LOCK = 0x01,
- CSD_FLAG_WAIT = 0x02,
+ CSD_FLAG_SYNCHRONOUS = 0x02,
};
struct call_function_data {
@@ -107,7 +107,7 @@ void __init call_function_init(void)
*/
static void csd_lock_wait(struct call_single_data *csd)
{
- while (csd->flags & CSD_FLAG_LOCK)
+ while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
cpu_relax();
}
@@ -121,19 +121,17 @@ static void csd_lock(struct call_single_data *csd)
* to ->flags with any subsequent assignments to other
* fields of the specified call_single_data structure:
*/
- smp_mb();
+ smp_wmb();
}
static void csd_unlock(struct call_single_data *csd)
{
- WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK));
+ WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
/*
* ensure we're all done before releasing data:
*/
- smp_mb();
-
- csd->flags &= ~CSD_FLAG_LOCK;
+ smp_store_release(&csd->flags, 0);
}
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
@@ -144,13 +142,16 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
* ->func, ->info, and ->flags set.
*/
static int generic_exec_single(int cpu, struct call_single_data *csd,
- smp_call_func_t func, void *info, int wait)
+ smp_call_func_t func, void *info)
{
- struct call_single_data csd_stack = { .flags = 0 };
- unsigned long flags;
-
-
if (cpu == smp_processor_id()) {
+ unsigned long flags;
+
+ /*
+ * We can unlock early even for the synchronous on-stack case,
+ * since we're doing this from the same CPU..
+ */
+ csd_unlock(csd);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
@@ -158,24 +159,14 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
}
- if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu))
+ if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+ csd_unlock(csd);
return -ENXIO;
-
-
- if (!csd) {
- csd = &csd_stack;
- if (!wait)
- csd = this_cpu_ptr(&csd_data);
}
- csd_lock(csd);
-
csd->func = func;
csd->info = info;
- if (wait)
- csd->flags |= CSD_FLAG_WAIT;
-
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
@@ -190,9 +181,6 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
arch_send_call_function_single_ipi(cpu);
- if (wait)
- csd_lock_wait(csd);
-
return 0;
}
@@ -250,8 +238,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
}
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
- csd->func(csd->info);
- csd_unlock(csd);
+ smp_call_func_t func = csd->func;
+ void *info = csd->info;
+
+ /* Do we wait until *after* callback? */
+ if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
+ func(info);
+ csd_unlock(csd);
+ } else {
+ csd_unlock(csd);
+ func(info);
+ }
}
/*
@@ -274,6 +271,8 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
int wait)
{
+ struct call_single_data *csd;
+ struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS };
int this_cpu;
int err;
@@ -292,7 +291,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
- err = generic_exec_single(cpu, NULL, func, info, wait);
+ csd = &csd_stack;
+ if (!wait) {
+ csd = this_cpu_ptr(&csd_data);
+ csd_lock(csd);
+ }
+
+ err = generic_exec_single(cpu, csd, func, info);
+
+ if (wait)
+ csd_lock_wait(csd);
put_cpu();
@@ -321,7 +329,15 @@ int smp_call_function_single_async(int cpu, struct call_single_data *csd)
int err = 0;
preempt_disable();
- err = generic_exec_single(cpu, csd, csd->func, csd->info, 0);
+
+ /* We could deadlock if we have to wait here with interrupts disabled! */
+ if (WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK))
+ csd_lock_wait(csd);
+
+ csd->flags = CSD_FLAG_LOCK;
+ smp_wmb();
+
+ err = generic_exec_single(cpu, csd, csd->func, csd->info);
preempt_enable();
return err;
@@ -433,6 +449,8 @@ void smp_call_function_many(const struct cpumask *mask,
struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock(csd);
+ if (wait)
+ csd->flags |= CSD_FLAG_SYNCHRONOUS;
csd->func = func;
csd->info = info;
llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 25d942d1da27..637a09461c1d 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -117,11 +117,7 @@ static int __clockevents_set_state(struct clock_event_device *dev,
/* Transition with new state-specific callbacks */
switch (state) {
case CLOCK_EVT_STATE_DETACHED:
- /*
- * This is an internal state, which is guaranteed to go from
- * SHUTDOWN to DETACHED. No driver interaction required.
- */
- return 0;
+ /* The clockevent device is getting replaced. Shut it down. */
case CLOCK_EVT_STATE_SHUTDOWN:
return dev->set_state_shutdown(dev);
@@ -440,7 +436,7 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
mutex_unlock(&clockevents_mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(clockevents_unbind);
+EXPORT_SYMBOL_GPL(clockevents_unbind_device);
/* Sanity check of state transition callbacks */
static int clockevents_sanity_check(struct clock_event_device *dev)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 76d4bd962b19..93ef7190bdea 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -266,21 +266,23 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
/*
* Divide a ktime value by a nanosecond value
*/
-u64 __ktime_divns(const ktime_t kt, s64 div)
+s64 __ktime_divns(const ktime_t kt, s64 div)
{
- u64 dclc;
int sft = 0;
+ s64 dclc;
+ u64 tmp;
dclc = ktime_to_ns(kt);
+ tmp = dclc < 0 ? -dclc : dclc;
+
/* Make sure the divisor is less than 2^32: */
while (div >> 32) {
sft++;
div >>= 1;
}
- dclc >>= sft;
- do_div(dclc, (unsigned long) div);
-
- return dclc;
+ tmp >>= sft;
+ do_div(tmp, (unsigned long) div);
+ return dclc < 0 ? -tmp : tmp;
}
EXPORT_SYMBOL_GPL(__ktime_divns);
#endif /* BITS_PER_LONG >= 64 */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2d56ce501632..50c4015a8ad3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -79,18 +79,6 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING,
};
-static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
-{
- /* NMI safe access to clock monotonic */
- return ktime_get_mono_fast_ns();
-}
-
-static const struct bpf_func_proto bpf_ktime_get_ns_proto = {
- .func = bpf_ktime_get_ns,
- .gpl_only = true,
- .ret_type = RET_INTEGER,
-};
-
/*
* limited trace_printk()
* only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
@@ -172,6 +160,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_probe_read_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
+ case BPF_FUNC_tail_call:
+ return &bpf_tail_call_proto;
case BPF_FUNC_trace_printk:
/*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 91eecaaa43e0..05330494a0df 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6079,7 +6079,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
if (ret) /* See tracing_get_cpu() */
- ret->d_inode->i_cdev = (void *)(cpu + 1);
+ d_inode(ret)->i_cdev = (void *)(cpu + 1);
return ret;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7da1dfeb322e..c4de47fc5cca 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -494,8 +494,8 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
if (dir) {
spin_lock(&dir->d_lock); /* probably unneeded */
list_for_each_entry(child, &dir->d_subdirs, d_child) {
- if (child->d_inode) /* probably unneeded */
- child->d_inode->i_private = NULL;
+ if (d_really_is_positive(child)) /* probably unneeded */
+ d_inode(child)->i_private = NULL;
}
spin_unlock(&dir->d_lock);
@@ -565,6 +565,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
{
char *event = NULL, *sub = NULL, *match;
+ int ret;
/*
* The buf format can be <subsystem>:<event-name>
@@ -590,7 +591,13 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
event = NULL;
}
- return __ftrace_set_clr_event(tr, match, sub, event, set);
+ ret = __ftrace_set_clr_event(tr, match, sub, event, set);
+
+ /* Put back the colon to allow this to be called again */
+ if (buf)
+ *(buf - 1) = ':';
+
+ return ret;
}
/**
@@ -1753,6 +1760,8 @@ static void update_event_printk(struct ftrace_event_call *call,
ptr++;
/* Check for alpha chars like ULL */
} while (isalnum(*ptr));
+ if (!*ptr)
+ break;
/*
* A number must have some kind of delimiter after
* it, and we can ignore that too.
@@ -1779,12 +1788,16 @@ static void update_event_printk(struct ftrace_event_call *call,
do {
ptr++;
} while (isalnum(*ptr) || *ptr == '_');
+ if (!*ptr)
+ break;
/*
* If what comes after this variable is a '.' or
* '->' then we can continue to ignore that string.
*/
if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
ptr += *ptr == '.' ? 1 : 2;
+ if (!*ptr)
+ break;
goto skip_more;
}
/*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 9cfea4c6d314..a51e79688455 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1308,15 +1308,19 @@ void graph_trace_open(struct trace_iterator *iter)
{
/* pid and depth on the last trace processed */
struct fgraph_data *data;
+ gfp_t gfpflags;
int cpu;
iter->private = NULL;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ /* We can be called in atomic context via ftrace_dump() */
+ gfpflags = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
+
+ data = kzalloc(sizeof(*data), gfpflags);
if (!data)
goto out_err;
- data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+ data->cpu_data = alloc_percpu_gfp(struct fgraph_cpu_data, gfpflags);
if (!data->cpu_data)
goto out_err_free;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 692bf7184c8c..25a086bcb700 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
EXPORT_SYMBOL(ftrace_print_hex_seq);
const char *
-ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len,
+ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count,
size_t el_size)
{
const char *ret = trace_seq_buffer_ptr(p);
const char *prefix = "";
void *ptr = (void *)buf;
+ size_t buf_len = count * el_size;
trace_seq_putc(p, '{');
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d60fe62ec4fa..6dd022c7b5bc 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -443,7 +443,7 @@ static int create_trace_uprobe(int argc, char **argv)
if (ret)
goto fail_address_parse;
- inode = igrab(path.dentry->d_inode);
+ inode = igrab(d_inode(path.dentry));
path_put(&path);
if (!inode || !S_ISREG(inode->i_mode)) {
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 2316f50b07a4..581a68a04c64 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -41,6 +41,8 @@
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+static DEFINE_MUTEX(watchdog_proc_mutex);
+
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#else
@@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void)
{
int cpu;
- if (!watchdog_user_enabled)
- return;
+ mutex_lock(&watchdog_proc_mutex);
+
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ goto unlock;
get_online_cpus();
for_each_online_cpu(cpu)
watchdog_nmi_enable(cpu);
put_online_cpus();
+
+unlock:
+ mutex_unlock(&watchdog_proc_mutex);
}
void watchdog_nmi_disable_all(void)
{
int cpu;
+ mutex_lock(&watchdog_proc_mutex);
+
if (!watchdog_running)
- return;
+ goto unlock;
get_online_cpus();
for_each_online_cpu(cpu)
watchdog_nmi_disable(cpu);
put_online_cpus();
+
+unlock:
+ mutex_unlock(&watchdog_proc_mutex);
}
#else
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
@@ -744,8 +756,6 @@ static int proc_watchdog_update(void)
}
-static DEFINE_MUTEX(watchdog_proc_mutex);
-
/*
* common function for watchdog, nmi_watchdog and soft_watchdog parameter
*