summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_watch.c12
-rw-r--r--kernel/fork.c19
-rw-r--r--kernel/irq/chip.c8
-rw-r--r--kernel/kprobes.c4
-rw-r--r--kernel/kthread.c8
-rw-r--r--kernel/locking/rwsem-xadd.c56
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/power/Kconfig1
-rw-r--r--kernel/sched/tune.c2
-rw-r--r--kernel/sys.c95
-rw-r--r--kernel/time/alarmtimer.c3
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/user_namespace.c39
-rw-r--r--kernel/utsname_sysctl.c41
17 files changed, 170 insertions, 136 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index a162661c9d60..f45a9a5d3e47 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -419,6 +419,13 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
struct path parent_path;
int h, ret = 0;
+ /*
+ * When we will be calling audit_add_to_parent, krule->watch might have
+ * been updated and watch might have been freed.
+ * So we need to keep a reference of watch.
+ */
+ audit_get_watch(watch);
+
mutex_unlock(&audit_filter_mutex);
/* Avoid calling path_lookup under audit_filter_mutex. */
@@ -427,8 +434,10 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
/* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
- if (ret)
+ if (ret) {
+ audit_put_watch(watch);
return ret;
+ }
/* either find an old parent or attach a new one */
parent = audit_find_parent(d_backing_inode(parent_path.dentry));
@@ -446,6 +455,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
*list = &audit_inode_hash[h];
error:
path_put(&parent_path);
+ audit_put_watch(watch);
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index caa23ca489bb..77a21c3d02e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1142,7 +1142,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
return -ENOMEM;
atomic_set(&sig->count, 1);
+ spin_lock_irq(&current->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+ spin_unlock_irq(&current->sighand->siglock);
return 0;
}
@@ -1370,6 +1372,18 @@ static struct task_struct *copy_process(unsigned long clone_flags,
cpufreq_task_times_init(p);
+ /*
+ * This _must_ happen before we call free_task(), i.e. before we jump
+ * to any of the bad_fork_* labels. This is to avoid freeing
+ * p->set_child_tid which is (ab)used as a kthread's data pointer for
+ * kernel threads (PF_KTHREAD).
+ */
+ p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+ /*
+ * Clear TID on mm_release()?
+ */
+ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
ftrace_graph_init_task(p);
rt_mutex_init_task(p);
@@ -1531,11 +1545,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
}
- p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
- /*
- * Clear TID on mm_release()?
- */
- p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e0449956298e..825ae448c7a9 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -338,7 +338,6 @@ void handle_nested_irq(unsigned int irq)
raw_spin_lock_irq(&desc->lock);
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
action = desc->action;
if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
@@ -346,6 +345,7 @@ void handle_nested_irq(unsigned int irq)
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock_irq(&desc->lock);
@@ -412,13 +412,13 @@ void handle_simple_irq(struct irq_desc *desc)
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
desc->istate |= IRQS_PENDING;
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
out_unlock:
@@ -462,7 +462,6 @@ void handle_level_irq(struct irq_desc *desc)
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
/*
* If its disabled or no action available
@@ -473,6 +472,7 @@ void handle_level_irq(struct irq_desc *desc)
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
cond_unmask_irq(desc);
@@ -532,7 +532,6 @@ void handle_fasteoi_irq(struct irq_desc *desc)
goto out;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
/*
* If its disabled or no action available
@@ -544,6 +543,7 @@ void handle_fasteoi_irq(struct irq_desc *desc)
goto out;
}
+ kstat_incr_irqs_this_cpu(desc);
if (desc->istate & IRQS_ONESHOT)
mask_irq(desc);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index bbe9dd0886bd..388bcace62f8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2441,7 +2441,7 @@ static int __init debugfs_kprobe_init(void)
if (!dir)
return -ENOMEM;
- file = debugfs_create_file("list", 0444, dir, NULL,
+ file = debugfs_create_file("list", 0400, dir, NULL,
&debugfs_kprobes_operations);
if (!file)
goto error;
@@ -2451,7 +2451,7 @@ static int __init debugfs_kprobe_init(void)
if (!file)
goto error;
- file = debugfs_create_file("blacklist", 0444, dir, NULL,
+ file = debugfs_create_file("blacklist", 0400, dir, NULL,
&debugfs_kprobe_blacklist_ops);
if (!file)
goto error;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index d9b0be5c6a5f..f529aafc1180 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -313,10 +313,16 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
task = create->result;
if (!IS_ERR(task)) {
static const struct sched_param param = { .sched_priority = 0 };
+ char name[TASK_COMM_LEN];
va_list args;
va_start(args, namefmt);
- vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
+ /*
+ * task is already visible to other tasks, so updating
+ * COMM must be protected.
+ */
+ vsnprintf(name, sizeof(name), namefmt, args);
+ set_task_comm(task, name);
va_end(args);
/*
* root may have changed our (kthreadd's) priority or CPU mask.
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 75c950ede9c7..1be33caf157d 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -511,38 +511,30 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
unsigned long flags;
/*
- * If a spinner is present, there is a chance that the load of
- * rwsem_has_spinner() in rwsem_wake() can be reordered with
- * respect to decrement of rwsem count in __up_write() leading
- * to wakeup being missed.
- *
- * spinning writer up_write caller
- * --------------- -----------------------
- * [S] osq_unlock() [L] osq
- * spin_lock(wait_lock)
- * sem->count=0xFFFFFFFF00000001
- * +0xFFFFFFFF00000000
- * count=sem->count
- * MB
- * sem->count=0xFFFFFFFE00000001
- * -0xFFFFFFFF00000001
- * RMB
- * spin_trylock(wait_lock)
- * return
- * rwsem_try_write_lock(count)
- * spin_unlock(wait_lock)
- * schedule()
- *
- * Reordering of atomic_long_sub_return_release() in __up_write()
- * and rwsem_has_spinner() in rwsem_wake() can cause missing of
- * wakeup in up_write() context. In spinning writer, sem->count
- * and local variable count is 0XFFFFFFFE00000001. It would result
- * in rwsem_try_write_lock() failing to acquire rwsem and spinning
- * writer going to sleep in rwsem_down_write_failed().
- *
- * The smp_rmb() here is to make sure that the spinner state is
- * consulted after sem->count is updated in up_write context.
- */
+ * __rwsem_down_write_failed_common(sem)
+ * rwsem_optimistic_spin(sem)
+ * osq_unlock(sem->osq)
+ * ...
+ * atomic_long_add_return(&sem->count)
+ *
+ * - VS -
+ *
+ * __up_write()
+ * if (atomic_long_sub_return_release(&sem->count) < 0)
+ * rwsem_wake(sem)
+ * osq_is_locked(&sem->osq)
+ *
+ * And __up_write() must observe !osq_is_locked() when it observes the
+ * atomic_long_add_return() in order to not miss a wakeup.
+ *
+ * This boils down to:
+ *
+ * [S.rel] X = 1 [RmW] r0 = (Y += 0)
+ * MB RMB
+ * [RmW] Y += 1 [L] r1 = X
+ *
+ * exists (r0=1 /\ r1=0)
+ */
smp_rmb();
/*
diff --git a/kernel/module.c b/kernel/module.c
index a0eeedb3e5cd..57809d2b4271 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3866,7 +3866,7 @@ static unsigned long mod_find_symname(struct module *mod, const char *name)
for (i = 0; i < kallsyms->num_symtab; i++)
if (strcmp(name, symname(kallsyms, i)) == 0 &&
- kallsyms->symtab[i].st_info != 'U')
+ kallsyms->symtab[i].st_shndx != SHN_UNDEF)
return kallsyms->symtab[i].st_value;
return 0;
}
@@ -3912,6 +3912,10 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
if (mod->state == MODULE_STATE_UNFORMED)
continue;
for (i = 0; i < kallsyms->num_symtab; i++) {
+
+ if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
+ continue;
+
ret = fn(data, symname(kallsyms, i),
mod, kallsyms->symtab[i].st_value);
if (ret != 0)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6d6f63be1f9b..4335e7d1c391 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -115,6 +115,7 @@ config PM_SLEEP
def_bool y
depends on SUSPEND || HIBERNATE_CALLBACKS
select PM
+ select SRCU
config PM_SLEEP_SMP
def_bool y
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 9c56841227cc..b84d13750604 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -718,7 +718,7 @@ prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 prefer_idle)
{
struct schedtune *st = css_st(css);
- st->prefer_idle = prefer_idle;
+ st->prefer_idle = !!prefer_idle;
return 0;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 0df4753d4969..ede0c1f4b860 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1144,18 +1144,19 @@ static int override_release(char __user *release, size_t len)
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{
- int errno = 0;
+ struct new_utsname tmp;
down_read(&uts_sem);
- if (copy_to_user(name, utsname(), sizeof *name))
- errno = -EFAULT;
+ memcpy(&tmp, utsname(), sizeof(tmp));
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!errno && override_release(name->release, sizeof(name->release)))
- errno = -EFAULT;
- if (!errno && override_architecture(name))
- errno = -EFAULT;
- return errno;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ if (override_architecture(name))
+ return -EFAULT;
+ return 0;
}
#ifdef __ARCH_WANT_SYS_OLD_UNAME
@@ -1164,55 +1165,46 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
*/
SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
{
- int error = 0;
+ struct old_utsname tmp;
if (!name)
return -EFAULT;
down_read(&uts_sem);
- if (copy_to_user(name, utsname(), sizeof(*name)))
- error = -EFAULT;
+ memcpy(&tmp, utsname(), sizeof(tmp));
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!error && override_release(name->release, sizeof(name->release)))
- error = -EFAULT;
- if (!error && override_architecture(name))
- error = -EFAULT;
- return error;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ if (override_architecture(name))
+ return -EFAULT;
+ return 0;
}
SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
{
- int error;
+ struct oldold_utsname tmp = {};
if (!name)
return -EFAULT;
- if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
- return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname, &utsname()->sysname,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename, &utsname()->nodename,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->release, &utsname()->release,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->release + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->version, &utsname()->version,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->version + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine, &utsname()->machine,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->machine + __OLD_UTS_LEN);
+ memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN);
+ memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN);
+ memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN);
+ memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN);
+ memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN);
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!error && override_architecture(name))
- error = -EFAULT;
- if (!error && override_release(name->release, sizeof(name->release)))
- error = -EFAULT;
- return error ? -EFAULT : 0;
+ if (override_architecture(name))
+ return -EFAULT;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ return 0;
}
#endif
@@ -1226,17 +1218,18 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- struct new_utsname *u = utsname();
+ struct new_utsname *u;
+ down_write(&uts_sem);
+ u = utsname();
memcpy(u->nodename, tmp, len);
memset(u->nodename + len, 0, sizeof(u->nodename) - len);
errno = 0;
uts_proc_notify(UTS_PROC_HOSTNAME);
+ up_write(&uts_sem);
}
- up_write(&uts_sem);
return errno;
}
@@ -1244,8 +1237,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
{
- int i, errno;
+ int i;
struct new_utsname *u;
+ char tmp[__NEW_UTS_LEN + 1];
if (len < 0)
return -EINVAL;
@@ -1254,11 +1248,11 @@ SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
i = 1 + strlen(u->nodename);
if (i > len)
i = len;
- errno = 0;
- if (copy_to_user(name, u->nodename, i))
- errno = -EFAULT;
+ memcpy(tmp, u->nodename, i);
up_read(&uts_sem);
- return errno;
+ if (copy_to_user(name, tmp, i))
+ return -EFAULT;
+ return 0;
}
#endif
@@ -1277,17 +1271,18 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- struct new_utsname *u = utsname();
+ struct new_utsname *u;
+ down_write(&uts_sem);
+ u = utsname();
memcpy(u->domainname, tmp, len);
memset(u->domainname + len, 0, sizeof(u->domainname) - len);
errno = 0;
uts_proc_notify(UTS_PROC_DOMAINNAME);
+ up_write(&uts_sem);
}
- up_write(&uts_sem);
return errno;
}
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index ceec77c652b5..271b37995a89 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -1005,7 +1005,8 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now = alarm_bases[type].gettime();
- exp = ktime_add(now, exp);
+
+ exp = ktime_add_safe(now, exp);
}
if (alarmtimer_do_nsleep(&alarm, exp))
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b674a7a8d655..c39fc68c4778 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1768,6 +1768,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
mutex_lock(&bdev->bd_mutex);
if (attr == &dev_attr_enable) {
+ if (!!value == !!q->blk_trace) {
+ ret = 0;
+ goto out_unlock_bdev;
+ }
if (value)
ret = blk_trace_setup_queue(q, bdev);
else
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index fdaa88f38aec..74b20e3ab8c6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1513,6 +1513,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
tmp_iter_page = first_page;
do {
+ cond_resched();
+
to_remove_page = tmp_iter_page;
rb_inc_page(cpu_buffer, &tmp_iter_page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b30adaae739a..1600d1d6e93f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6657,7 +6657,9 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
if (buffer) {
mutex_lock(&trace_types_lock);
- if (val) {
+ if (!!val == tracer_tracing_is_on(tr)) {
+ val = 0; /* do nothing */
+ } else if (val) {
tracer_tracing_on(tr);
if (tr->current_trace->start)
tr->current_trace->start(tr);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 64304388993e..31a436f9f13b 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -969,7 +969,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
list_del_rcu(&link->list);
/* synchronize with u{,ret}probe_trace_func */
- synchronize_sched();
+ synchronize_rcu();
kfree(link);
if (!list_empty(&tu->tp.files))
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 88fefa68c516..a965df4b54f5 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -602,9 +602,26 @@ static ssize_t map_write(struct file *file, const char __user *buf,
struct uid_gid_map new_map;
unsigned idx;
struct uid_gid_extent *extent = NULL;
- unsigned long page = 0;
+ unsigned long page;
char *kbuf, *pos, *next_line;
- ssize_t ret = -EINVAL;
+ ssize_t ret;
+
+ /* Only allow < page size writes at the beginning of the file */
+ if ((*ppos != 0) || (count >= PAGE_SIZE))
+ return -EINVAL;
+
+ /* Get a buffer */
+ page = __get_free_page(GFP_TEMPORARY);
+ kbuf = (char *) page;
+ if (!page)
+ return -ENOMEM;
+
+ /* Slurp in the user data */
+ if (copy_from_user(kbuf, buf, count)) {
+ free_page(page);
+ return -EFAULT;
+ }
+ kbuf[count] = '\0';
/*
* The userns_state_mutex serializes all writes to any given map.
@@ -638,24 +655,6 @@ static ssize_t map_write(struct file *file, const char __user *buf,
if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
goto out;
- /* Get a buffer */
- ret = -ENOMEM;
- page = __get_free_page(GFP_TEMPORARY);
- kbuf = (char *) page;
- if (!page)
- goto out;
-
- /* Only allow < page size writes at the beginning of the file */
- ret = -EINVAL;
- if ((*ppos != 0) || (count >= PAGE_SIZE))
- goto out;
-
- /* Slurp in the user data */
- ret = -EFAULT;
- if (copy_from_user(kbuf, buf, count))
- goto out;
- kbuf[count] = '\0';
-
/* Parse the user data */
ret = -EINVAL;
pos = kbuf;
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index c8eac43267e9..d2b3b2973456 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -17,7 +17,7 @@
#ifdef CONFIG_PROC_SYSCTL
-static void *get_uts(struct ctl_table *table, int write)
+static void *get_uts(struct ctl_table *table)
{
char *which = table->data;
struct uts_namespace *uts_ns;
@@ -25,21 +25,9 @@ static void *get_uts(struct ctl_table *table, int write)
uts_ns = current->nsproxy->uts_ns;
which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
- if (!write)
- down_read(&uts_sem);
- else
- down_write(&uts_sem);
return which;
}
-static void put_uts(struct ctl_table *table, int write, void *which)
-{
- if (!write)
- up_read(&uts_sem);
- else
- up_write(&uts_sem);
-}
-
/*
* Special case of dostring for the UTS structure. This has locks
* to observe. Should this be in kernel/sys.c ????
@@ -49,13 +37,34 @@ static int proc_do_uts_string(struct ctl_table *table, int write,
{
struct ctl_table uts_table;
int r;
+ char tmp_data[__NEW_UTS_LEN + 1];
+
memcpy(&uts_table, table, sizeof(uts_table));
- uts_table.data = get_uts(table, write);
+ uts_table.data = tmp_data;
+
+ /*
+ * Buffer the value in tmp_data so that proc_dostring() can be called
+ * without holding any locks.
+ * We also need to read the original value in the write==1 case to
+ * support partial writes.
+ */
+ down_read(&uts_sem);
+ memcpy(tmp_data, get_uts(table), sizeof(tmp_data));
+ up_read(&uts_sem);
r = proc_dostring(&uts_table, write, buffer, lenp, ppos);
- put_uts(table, write, uts_table.data);
- if (write)
+ if (write) {
+ /*
+ * Write back the new value.
+ * Note that, since we dropped uts_sem, the result can
+ * theoretically be incorrect if there are two parallel writes
+ * at non-zero offsets to the same sysctl.
+ */
+ down_write(&uts_sem);
+ memcpy(get_uts(table), tmp_data, sizeof(tmp_data));
+ up_write(&uts_sem);
proc_sys_poll_notify(table->poll);
+ }
return r;
}