summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile22
-rw-r--r--kernel/compat.c74
-rw-r--r--kernel/debug/debug_core.c1
-rw-r--r--kernel/debug/gdbstub.c1
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/futex_compat.c21
-rw-r--r--kernel/gcov/Kconfig2
-rw-r--r--kernel/kexec.c34
-rw-r--r--kernel/lockdep.c15
-rw-r--r--kernel/module.c140
-rw-r--r--kernel/nsproxy.c3
-rw-r--r--kernel/panic.c34
-rw-r--r--kernel/power/suspend_test.c11
-rw-r--r--kernel/printk.c9
-rw-r--r--kernel/sched/core.c55
-rw-r--r--kernel/signal.c332
-rw-r--r--kernel/smp.c183
-rw-r--r--kernel/sys.c293
-rw-r--r--kernel/sysctl.c12
-rw-r--r--kernel/time.c4
-rw-r--r--kernel/time/clockevents.c1
-rw-r--r--kernel/time/ntp.c26
-rw-r--r--kernel/time/timekeeping.c26
-rw-r--r--kernel/user.c4
-rw-r--r--kernel/user_namespace.c62
-rw-r--r--kernel/watchdog.c10
26 files changed, 806 insertions, 571 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c072b6da239..eceac38f3c65 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -153,23 +153,7 @@ kernel/modsign_certificate.o: signing_key.x509 extra_certificates
# fail and that the kernel may be used afterwards.
#
###############################################################################
-sign_key_with_hash :=
-ifeq ($(CONFIG_MODULE_SIG_SHA1),y)
-sign_key_with_hash := -sha1
-endif
-ifeq ($(CONFIG_MODULE_SIG_SHA224),y)
-sign_key_with_hash := -sha224
-endif
-ifeq ($(CONFIG_MODULE_SIG_SHA256),y)
-sign_key_with_hash := -sha256
-endif
-ifeq ($(CONFIG_MODULE_SIG_SHA384),y)
-sign_key_with_hash := -sha384
-endif
-ifeq ($(CONFIG_MODULE_SIG_SHA512),y)
-sign_key_with_hash := -sha512
-endif
-ifeq ($(sign_key_with_hash),)
+ifndef CONFIG_MODULE_SIG_HASH
$(error Could not determine digest type to use from kernel config)
endif
@@ -182,8 +166,8 @@ signing_key.priv signing_key.x509: x509.genkey
@echo "### needs to be run as root, and uses a hardware random"
@echo "### number generator if one is available."
@echo "###"
- openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \
- -x509 -config x509.genkey \
+ openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \
+ -batch -x509 -config x509.genkey \
-outform DER -out signing_key.x509 \
-keyout signing_key.priv
@echo "###"
diff --git a/kernel/compat.c b/kernel/compat.c
index 36700e9e2be9..19971d8c7299 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -290,8 +290,8 @@ static inline long put_compat_itimerval(struct compat_itimerval __user *o,
__put_user(i->it_value.tv_usec, &o->it_value.tv_usec)));
}
-asmlinkage long compat_sys_getitimer(int which,
- struct compat_itimerval __user *it)
+COMPAT_SYSCALL_DEFINE2(getitimer, int, which,
+ struct compat_itimerval __user *, it)
{
struct itimerval kit;
int error;
@@ -302,9 +302,9 @@ asmlinkage long compat_sys_getitimer(int which,
return error;
}
-asmlinkage long compat_sys_setitimer(int which,
- struct compat_itimerval __user *in,
- struct compat_itimerval __user *out)
+COMPAT_SYSCALL_DEFINE3(setitimer, int, which,
+ struct compat_itimerval __user *, in,
+ struct compat_itimerval __user *, out)
{
struct itimerval kin, kout;
int error;
@@ -381,9 +381,9 @@ static inline void compat_sig_setmask(sigset_t *blocked, compat_sigset_word set)
memcpy(blocked->sig, &set, sizeof(set));
}
-asmlinkage long compat_sys_sigprocmask(int how,
- compat_old_sigset_t __user *nset,
- compat_old_sigset_t __user *oset)
+COMPAT_SYSCALL_DEFINE3(sigprocmask, int, how,
+ compat_old_sigset_t __user *, nset,
+ compat_old_sigset_t __user *, oset)
{
old_sigset_t old_set, new_set;
sigset_t new_blocked;
@@ -593,7 +593,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
else
ret = put_compat_rusage(&ru, uru);
if (ret)
- return ret;
+ return -EFAULT;
}
BUG_ON(info.si_code & __SI_MASK);
@@ -971,7 +971,7 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
}
void
-sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
+sigset_from_compat(sigset_t *set, const compat_sigset_t *compat)
{
switch (_NSIG_WORDS) {
case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
@@ -982,10 +982,20 @@ sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
}
EXPORT_SYMBOL_GPL(sigset_from_compat);
-asmlinkage long
-compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
- struct compat_siginfo __user *uinfo,
- struct compat_timespec __user *uts, compat_size_t sigsetsize)
+void
+sigset_to_compat(compat_sigset_t *compat, const sigset_t *set)
+{
+ switch (_NSIG_WORDS) {
+ case 4: compat->sig[7] = (set->sig[3] >> 32); compat->sig[6] = set->sig[3];
+ case 3: compat->sig[5] = (set->sig[2] >> 32); compat->sig[4] = set->sig[2];
+ case 2: compat->sig[3] = (set->sig[1] >> 32); compat->sig[2] = set->sig[1];
+ case 1: compat->sig[1] = (set->sig[0] >> 32); compat->sig[0] = set->sig[0];
+ }
+}
+
+COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
+ struct compat_siginfo __user *, uinfo,
+ struct compat_timespec __user *, uts, compat_size_t, sigsetsize)
{
compat_sigset_t s32;
sigset_t s;
@@ -1013,18 +1023,6 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
}
return ret;
-
-}
-
-asmlinkage long
-compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
- struct compat_siginfo __user *uinfo)
-{
- siginfo_t info;
-
- if (copy_siginfo_from_user32(&info, uinfo))
- return -EFAULT;
- return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
}
#ifdef __ARCH_WANT_COMPAT_SYS_TIME
@@ -1067,23 +1065,6 @@ asmlinkage long compat_sys_stime(compat_time_t __user *tptr)
#endif /* __ARCH_WANT_COMPAT_SYS_TIME */
-#ifdef __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
-asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat_size_t sigsetsize)
-{
- sigset_t newset;
- compat_sigset_t newset32;
-
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
-
- if (copy_from_user(&newset32, unewset, sizeof(compat_sigset_t)))
- return -EFAULT;
- sigset_from_compat(&newset, &newset32);
- return sigsuspend(&newset);
-}
-#endif /* __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND */
-
asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
{
struct timex txc;
@@ -1222,9 +1203,9 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
return 0;
}
-#ifdef __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
-asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval)
+COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
+ compat_pid_t, pid,
+ struct compat_timespec __user *, interval)
{
struct timespec t;
int ret;
@@ -1237,7 +1218,6 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
return -EFAULT;
return ret;
}
-#endif /* __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL */
/*
* Allocate user-space memory for the duration of a single system call,
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 9a61738cefc8..c26278fd4851 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -29,6 +29,7 @@
*/
#include <linux/pid_namespace.h>
#include <linux/clocksource.h>
+#include <linux/serial_core.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/console.h>
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index ce615e064482..38573f35a5ad 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/kgdb.h>
#include <linux/kdb.h>
+#include <linux/serial_core.h>
#include <linux/reboot.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 9618b6e9fb36..fbc07a29ec53 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2472,8 +2472,6 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
if (!futex_cmpxchg_enabled)
return -ENOSYS;
- WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
-
rcu_read_lock();
ret = -ESRCH;
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 83e368b005fc..f9f44fd4d34d 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -11,6 +11,7 @@
#include <linux/nsproxy.h>
#include <linux/futex.h>
#include <linux/ptrace.h>
+#include <linux/syscalls.h>
#include <asm/uaccess.h>
@@ -116,9 +117,9 @@ void compat_exit_robust_list(struct task_struct *curr)
}
}
-asmlinkage long
-compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
- compat_size_t len)
+COMPAT_SYSCALL_DEFINE2(set_robust_list,
+ struct compat_robust_list_head __user *, head,
+ compat_size_t, len)
{
if (!futex_cmpxchg_enabled)
return -ENOSYS;
@@ -131,9 +132,9 @@ compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
return 0;
}
-asmlinkage long
-compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
- compat_size_t __user *len_ptr)
+COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+ compat_uptr_t __user *, head_ptr,
+ compat_size_t __user *, len_ptr)
{
struct compat_robust_list_head __user *head;
unsigned long ret;
@@ -142,8 +143,6 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
if (!futex_cmpxchg_enabled)
return -ENOSYS;
- WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
-
rcu_read_lock();
ret = -ESRCH;
@@ -172,9 +171,9 @@ err_unlock:
return ret;
}
-asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
- struct compat_timespec __user *utime, u32 __user *uaddr2,
- u32 val3)
+COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+ struct compat_timespec __user *, utime, u32 __user *, uaddr2,
+ u32, val3)
{
struct timespec ts;
ktime_t t, *tp = NULL;
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index a92028196cc1..d4da55d1fb65 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -35,7 +35,7 @@ config GCOV_KERNEL
config GCOV_PROFILE_ALL
bool "Profile entire Kernel"
depends on GCOV_KERNEL
- depends on SUPERH || S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
+ depends on SUPERH || S390 || X86 || PPC || MICROBLAZE
default n
---help---
This options activates profiling for the entire kernel.
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 5e4bd7864c5d..2436ffcec91f 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -54,6 +54,12 @@ struct resource crashk_res = {
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
+struct resource crashk_low_res = {
+ .name = "Crash kernel low",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
int kexec_should_crash(struct task_struct *p)
{
@@ -1369,10 +1375,11 @@ static int __init parse_crashkernel_simple(char *cmdline,
* That function is the entry point for command line parsing and should be
* called from the arch-specific code.
*/
-int __init parse_crashkernel(char *cmdline,
+static int __init __parse_crashkernel(char *cmdline,
unsigned long long system_ram,
unsigned long long *crash_size,
- unsigned long long *crash_base)
+ unsigned long long *crash_base,
+ const char *name)
{
char *p = cmdline, *ck_cmdline = NULL;
char *first_colon, *first_space;
@@ -1382,16 +1389,16 @@ int __init parse_crashkernel(char *cmdline,
*crash_base = 0;
/* find crashkernel and use the last one if there are more */
- p = strstr(p, "crashkernel=");
+ p = strstr(p, name);
while (p) {
ck_cmdline = p;
- p = strstr(p+1, "crashkernel=");
+ p = strstr(p+1, name);
}
if (!ck_cmdline)
return -EINVAL;
- ck_cmdline += 12; /* strlen("crashkernel=") */
+ ck_cmdline += strlen(name);
/*
* if the commandline contains a ':', then that's the extended
@@ -1409,6 +1416,23 @@ int __init parse_crashkernel(char *cmdline,
return 0;
}
+int __init parse_crashkernel(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
+{
+ return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
+ "crashkernel=");
+}
+
+int __init parse_crashkernel_low(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
+{
+ return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
+ "crashkernel_low=");
+}
static void update_vmcoreinfo_note(void)
{
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 7981e5b2350d..8a0efac4f99d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3190,9 +3190,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
#endif
if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
debug_locks_off();
- printk("BUG: MAX_LOCK_DEPTH too low!\n");
+ printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n",
+ curr->lockdep_depth, MAX_LOCK_DEPTH);
printk("turning off the locking correctness validator.\n");
+
+ lockdep_print_held_locks(current);
+ debug_show_all_locks();
dump_stack();
+
return 0;
}
@@ -3203,7 +3208,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
}
static int
-print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
+print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
unsigned long ip)
{
if (!debug_locks_off())
@@ -3246,7 +3251,7 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
return 0;
if (curr->lockdep_depth <= 0)
- return print_unlock_inbalance_bug(curr, lock, ip);
+ return print_unlock_imbalance_bug(curr, lock, ip);
return 1;
}
@@ -3317,7 +3322,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
goto found_it;
prev_hlock = hlock;
}
- return print_unlock_inbalance_bug(curr, lock, ip);
+ return print_unlock_imbalance_bug(curr, lock, ip);
found_it:
lockdep_init_map(lock, name, key, 0);
@@ -3384,7 +3389,7 @@ lock_release_non_nested(struct task_struct *curr,
goto found_it;
prev_hlock = hlock;
}
- return print_unlock_inbalance_bug(curr, lock, ip);
+ return print_unlock_imbalance_bug(curr, lock, ip);
found_it:
if (hlock->instance == lock)
diff --git a/kernel/module.c b/kernel/module.c
index eab08274ec9b..921bed4794e9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -197,9 +197,10 @@ static inline int strong_try_module_get(struct module *mod)
return -ENOENT;
}
-static inline void add_taint_module(struct module *mod, unsigned flag)
+static inline void add_taint_module(struct module *mod, unsigned flag,
+ enum lockdep_ok lockdep_ok)
{
- add_taint(flag);
+ add_taint(flag, lockdep_ok);
mod->taints |= (1U << flag);
}
@@ -727,7 +728,7 @@ static inline int try_force_unload(unsigned int flags)
{
int ret = (flags & O_TRUNC);
if (ret)
- add_taint(TAINT_FORCED_RMMOD);
+ add_taint(TAINT_FORCED_RMMOD, LOCKDEP_NOW_UNRELIABLE);
return ret;
}
#else
@@ -1138,7 +1139,7 @@ static int try_to_force_load(struct module *mod, const char *reason)
if (!test_taint(TAINT_FORCED_MODULE))
printk(KERN_WARNING "%s: %s: kernel tainted.\n",
mod->name, reason);
- add_taint_module(mod, TAINT_FORCED_MODULE);
+ add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_NOW_UNRELIABLE);
return 0;
#else
return -ENOEXEC;
@@ -2147,7 +2148,8 @@ static void set_license(struct module *mod, const char *license)
if (!test_taint(TAINT_PROPRIETARY_MODULE))
printk(KERN_WARNING "%s: module license '%s' taints "
"kernel.\n", mod->name, license);
- add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+ add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
+ LOCKDEP_NOW_UNRELIABLE);
}
}
@@ -2700,10 +2702,10 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
}
if (!get_modinfo(info, "intree"))
- add_taint_module(mod, TAINT_OOT_MODULE);
+ add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
if (get_modinfo(info, "staging")) {
- add_taint_module(mod, TAINT_CRAP);
+ add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
printk(KERN_WARNING "%s: module is from the staging directory,"
" the quality is unknown, you have been warned.\n",
mod->name);
@@ -2869,15 +2871,17 @@ static int check_module_license_and_versions(struct module *mod)
* using GPL-only symbols it needs.
*/
if (strcmp(mod->name, "ndiswrapper") == 0)
- add_taint(TAINT_PROPRIETARY_MODULE);
+ add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_NOW_UNRELIABLE);
/* driverloader was caught wrongly pretending to be under GPL */
if (strcmp(mod->name, "driverloader") == 0)
- add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+ add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
+ LOCKDEP_NOW_UNRELIABLE);
/* lve claims to be GPL but upstream won't provide source */
if (strcmp(mod->name, "lve") == 0)
- add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+ add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
+ LOCKDEP_NOW_UNRELIABLE);
#ifdef CONFIG_MODVERSIONS
if ((mod->num_syms && !mod->crcs)
@@ -3141,12 +3145,72 @@ static int may_init_module(void)
return 0;
}
+/*
+ * We try to place it in the list now to make sure it's unique before
+ * we dedicate too many resources. In particular, temporary percpu
+ * memory exhaustion.
+ */
+static int add_unformed_module(struct module *mod)
+{
+ int err;
+ struct module *old;
+
+ mod->state = MODULE_STATE_UNFORMED;
+
+again:
+ mutex_lock(&module_mutex);
+ if ((old = find_module_all(mod->name, true)) != NULL) {
+ if (old->state == MODULE_STATE_COMING
+ || old->state == MODULE_STATE_UNFORMED) {
+ /* Wait in case it fails to load. */
+ mutex_unlock(&module_mutex);
+ err = wait_event_interruptible(module_wq,
+ finished_loading(mod->name));
+ if (err)
+ goto out_unlocked;
+ goto again;
+ }
+ err = -EEXIST;
+ goto out;
+ }
+ list_add_rcu(&mod->list, &modules);
+ err = 0;
+
+out:
+ mutex_unlock(&module_mutex);
+out_unlocked:
+ return err;
+}
+
+static int complete_formation(struct module *mod, struct load_info *info)
+{
+ int err;
+
+ mutex_lock(&module_mutex);
+
+ /* Find duplicate symbols (must be called under lock). */
+ err = verify_export_symbols(mod);
+ if (err < 0)
+ goto out;
+
+ /* This relies on module_mutex for list integrity. */
+ module_bug_finalize(info->hdr, info->sechdrs, mod);
+
+ /* Mark state as coming so strong_try_module_get() ignores us,
+ * but kallsyms etc. can see us. */
+ mod->state = MODULE_STATE_COMING;
+
+out:
+ mutex_unlock(&module_mutex);
+ return err;
+}
+
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static int load_module(struct load_info *info, const char __user *uargs,
int flags)
{
- struct module *mod, *old;
+ struct module *mod;
long err;
err = module_sig_check(info);
@@ -3164,36 +3228,20 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto free_copy;
}
- /*
- * We try to place it in the list now to make sure it's unique
- * before we dedicate too many resources. In particular,
- * temporary percpu memory exhaustion.
- */
- mod->state = MODULE_STATE_UNFORMED;
-again:
- mutex_lock(&module_mutex);
- if ((old = find_module_all(mod->name, true)) != NULL) {
- if (old->state == MODULE_STATE_COMING
- || old->state == MODULE_STATE_UNFORMED) {
- /* Wait in case it fails to load. */
- mutex_unlock(&module_mutex);
- err = wait_event_interruptible(module_wq,
- finished_loading(mod->name));
- if (err)
- goto free_module;
- goto again;
- }
- err = -EEXIST;
- mutex_unlock(&module_mutex);
+ /* Reserve our place in the list. */
+ err = add_unformed_module(mod);
+ if (err)
goto free_module;
- }
- list_add_rcu(&mod->list, &modules);
- mutex_unlock(&module_mutex);
#ifdef CONFIG_MODULE_SIG
mod->sig_ok = info->sig_ok;
- if (!mod->sig_ok)
- add_taint_module(mod, TAINT_FORCED_MODULE);
+ if (!mod->sig_ok) {
+ printk_once(KERN_NOTICE
+ "%s: module verification failed: signature and/or"
+ " required key missing - tainting kernel\n",
+ mod->name);
+ add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_STILL_OK);
+ }
#endif
/* Now module is in final location, initialize linked lists, etc. */
@@ -3236,21 +3284,11 @@ again:
dynamic_debug_setup(info->debug, info->num_debug);
- mutex_lock(&module_mutex);
- /* Find duplicate symbols (must be called under lock). */
- err = verify_export_symbols(mod);
- if (err < 0)
+ /* Finally it's fully formed, ready to start executing. */
+ err = complete_formation(mod, info);
+ if (err)
goto ddebug_cleanup;
- /* This relies on module_mutex for list integrity. */
- module_bug_finalize(info->hdr, info->sechdrs, mod);
-
- /* Mark state as coming so strong_try_module_get() ignores us,
- * but kallsyms etc. can see us. */
- mod->state = MODULE_STATE_COMING;
-
- mutex_unlock(&module_mutex);
-
/* Module is ready to execute: parsing args may do that. */
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-32768, 32767, &ddebug_dyndbg_module_param_cb);
@@ -3274,8 +3312,8 @@ again:
/* module_bug_cleanup needs module_mutex protection */
mutex_lock(&module_mutex);
module_bug_cleanup(mod);
- ddebug_cleanup:
mutex_unlock(&module_mutex);
+ ddebug_cleanup:
dynamic_debug_remove(info->debug);
synchronize_sched();
kfree(mod->args);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 78e2ecb20165..b781e66a8f2c 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -153,8 +153,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
goto out;
}
- new_ns = create_new_namespaces(flags, tsk,
- task_cred_xxx(tsk, user_ns), tsk->fs);
+ new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
if (IS_ERR(new_ns)) {
err = PTR_ERR(new_ns);
goto out;
diff --git a/kernel/panic.c b/kernel/panic.c
index e1b2822fff97..7c57cc9eee2c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -259,26 +259,19 @@ unsigned long get_taint(void)
return tainted_mask;
}
-void add_taint(unsigned flag)
+/**
+ * add_taint: add a taint flag if not already set.
+ * @flag: one of the TAINT_* constants.
+ * @lockdep_ok: whether lock debugging is still OK.
+ *
+ * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for
+ * some notewortht-but-not-corrupting cases, it can be set to true.
+ */
+void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
{
- /*
- * Can't trust the integrity of the kernel anymore.
- * We don't call directly debug_locks_off() because the issue
- * is not necessarily serious enough to set oops_in_progress to 1
- * Also we want to keep up lockdep for staging/out-of-tree
- * development and post-warning case.
- */
- switch (flag) {
- case TAINT_CRAP:
- case TAINT_OOT_MODULE:
- case TAINT_WARN:
- case TAINT_FIRMWARE_WORKAROUND:
- break;
-
- default:
- if (__debug_locks_off())
- printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n");
- }
+ if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off())
+ printk(KERN_WARNING
+ "Disabling lock debugging due to kernel taint\n");
set_bit(flag, &tainted_mask);
}
@@ -421,7 +414,8 @@ static void warn_slowpath_common(const char *file, int line, void *caller,
print_modules();
dump_stack();
print_oops_end_marker();
- add_taint(taint);
+ /* Just a warning, don't kill lockdep. */
+ add_taint(taint, LOCKDEP_STILL_OK);
}
void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index 25596e450ac7..9b2a1d58558d 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -112,7 +112,7 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
rtc_set_alarm(rtc, &alm);
}
-static int __init has_wakealarm(struct device *dev, void *name_ptr)
+static int __init has_wakealarm(struct device *dev, const void *data)
{
struct rtc_device *candidate = to_rtc_device(dev);
@@ -121,7 +121,6 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr)
if (!device_may_wakeup(candidate->dev.parent))
return 0;
- *(const char **)name_ptr = dev_name(dev);
return 1;
}
@@ -159,8 +158,8 @@ static int __init test_suspend(void)
static char warn_no_rtc[] __initdata =
KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
- char *pony = NULL;
struct rtc_device *rtc = NULL;
+ struct device *dev;
/* PM is initialized by now; is that state testable? */
if (test_state == PM_SUSPEND_ON)
@@ -171,9 +170,9 @@ static int __init test_suspend(void)
}
/* RTCs have initialized by now too ... can we use one? */
- class_find_device(rtc_class, NULL, &pony, has_wakealarm);
- if (pony)
- rtc = rtc_class_open(pony);
+ dev = class_find_device(rtc_class, NULL, NULL, has_wakealarm);
+ if (dev)
+ rtc = rtc_class_open(dev_name(dev));
if (!rtc) {
printk(warn_no_rtc);
goto done;
diff --git a/kernel/printk.c b/kernel/printk.c
index f24633afa46a..0b31715f335a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -88,6 +88,12 @@ static DEFINE_SEMAPHORE(console_sem);
struct console *console_drivers;
EXPORT_SYMBOL_GPL(console_drivers);
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map console_lock_dep_map = {
+ .name = "console_lock"
+};
+#endif
+
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
@@ -1919,6 +1925,7 @@ void console_lock(void)
return;
console_locked = 1;
console_may_schedule = 1;
+ mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
}
EXPORT_SYMBOL(console_lock);
@@ -1940,6 +1947,7 @@ int console_trylock(void)
}
console_locked = 1;
console_may_schedule = 0;
+ mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
return 1;
}
EXPORT_SYMBOL(console_trylock);
@@ -2102,6 +2110,7 @@ skip:
local_irq_restore(flags);
}
console_locked = 0;
+ mutex_release(&console_lock_dep_map, 1, _RET_IP_);
/* Release the exclusive_console once it is used */
if (unlikely(exclusive_console))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3a673a3b0c6b..fc9103e9ff03 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1132,18 +1132,28 @@ EXPORT_SYMBOL_GPL(kick_process);
*/
static int select_fallback_rq(int cpu, struct task_struct *p)
{
- const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+ int nid = cpu_to_node(cpu);
+ const struct cpumask *nodemask = NULL;
enum { cpuset, possible, fail } state = cpuset;
int dest_cpu;
- /* Look for allowed, online CPU in same node. */
- for_each_cpu(dest_cpu, nodemask) {
- if (!cpu_online(dest_cpu))
- continue;
- if (!cpu_active(dest_cpu))
- continue;
- if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
- return dest_cpu;
+ /*
+ * If the node that the cpu is on has been offlined, cpu_to_node()
+ * will return -1. There is no cpu on the node, and we should
+ * select the cpu on the other node.
+ */
+ if (nid != -1) {
+ nodemask = cpumask_of_node(nid);
+
+ /* Look for allowed, online CPU in same node. */
+ for_each_cpu(dest_cpu, nodemask) {
+ if (!cpu_online(dest_cpu))
+ continue;
+ if (!cpu_active(dest_cpu))
+ continue;
+ if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+ return dest_cpu;
+ }
}
for (;;) {
@@ -2786,7 +2796,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
if (irqs_disabled())
print_irqtrace_events(prev);
dump_stack();
- add_taint(TAINT_WARN);
+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
}
/*
@@ -4364,7 +4374,10 @@ EXPORT_SYMBOL(yield);
* It's the caller's job to ensure that the target task struct
* can't go away on us before we can do any checks.
*
- * Returns true if we indeed boosted the target task.
+ * Returns:
+ * true (>0) if we indeed boosted the target task.
+ * false (0) if we failed to boost the target.
+ * -ESRCH if there's no task to yield to.
*/
bool __sched yield_to(struct task_struct *p, bool preempt)
{
@@ -4378,6 +4391,15 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
again:
p_rq = task_rq(p);
+ /*
+ * If we're the only runnable task on the rq and target rq also
+ * has only one task, there's absolutely no point in yielding.
+ */
+ if (rq->nr_running == 1 && p_rq->nr_running == 1) {
+ yielded = -ESRCH;
+ goto out_irq;
+ }
+
double_rq_lock(rq, p_rq);
while (task_rq(p) != p_rq) {
double_rq_unlock(rq, p_rq);
@@ -4385,13 +4407,13 @@ again:
}
if (!curr->sched_class->yield_to_task)
- goto out;
+ goto out_unlock;
if (curr->sched_class != p->sched_class)
- goto out;
+ goto out_unlock;
if (task_running(p_rq, p) || p->state)
- goto out;
+ goto out_unlock;
yielded = curr->sched_class->yield_to_task(rq, p, preempt);
if (yielded) {
@@ -4404,11 +4426,12 @@ again:
resched_task(p_rq->curr);
}
-out:
+out_unlock:
double_rq_unlock(rq, p_rq);
+out_irq:
local_irq_restore(flags);
- if (yielded)
+ if (yielded > 0)
schedule();
return yielded;
diff --git a/kernel/signal.c b/kernel/signal.c
index 7f82adbad480..2a7ae2963185 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2399,6 +2399,15 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
tracehook_signal_handler(sig, info, ka, regs, stepping);
}
+void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
+{
+ if (failed)
+ force_sigsegv(ksig->sig, current);
+ else
+ signal_delivered(ksig->sig, &ksig->info, &ksig->ka,
+ signal_pt_regs(), stepping);
+}
+
/*
* It could be that complete_signal() picked us to notify about the
* group-wide signal. Other threads should be notified now to take
@@ -2616,28 +2625,58 @@ SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset,
return 0;
}
-long do_sigpending(void __user *set, unsigned long sigsetsize)
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
+ compat_sigset_t __user *, oset, compat_size_t, sigsetsize)
{
- long error = -EINVAL;
- sigset_t pending;
+#ifdef __BIG_ENDIAN
+ sigset_t old_set = current->blocked;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+
+ if (nset) {
+ compat_sigset_t new32;
+ sigset_t new_set;
+ int error;
+ if (copy_from_user(&new32, nset, sizeof(compat_sigset_t)))
+ return -EFAULT;
+
+ sigset_from_compat(&new_set, &new32);
+ sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
+
+ error = sigprocmask(how, &new_set, NULL);
+ if (error)
+ return error;
+ }
+ if (oset) {
+ compat_sigset_t old32;
+ sigset_to_compat(&old32, &old_set);
+ if (copy_to_user(oset, &old_set, sizeof(sigset_t)))
+ return -EFAULT;
+ }
+ return 0;
+#else
+ return sys_rt_sigprocmask(how, (sigset_t __user *)nset,
+ (sigset_t __user *)oset, sigsetsize);
+#endif
+}
+#endif
+static int do_sigpending(void *set, unsigned long sigsetsize)
+{
if (sigsetsize > sizeof(sigset_t))
- goto out;
+ return -EINVAL;
spin_lock_irq(&current->sighand->siglock);
- sigorsets(&pending, &current->pending.signal,
+ sigorsets(set, &current->pending.signal,
&current->signal->shared_pending.signal);
spin_unlock_irq(&current->sighand->siglock);
/* Outside the lock because only this thread touches it. */
- sigandsets(&pending, &current->blocked, &pending);
-
- error = -EFAULT;
- if (!copy_to_user(set, &pending, sigsetsize))
- error = 0;
-
-out:
- return error;
+ sigandsets(set, &current->blocked, set);
+ return 0;
}
/**
@@ -2646,10 +2685,35 @@ out:
* @set: stores pending signals
* @sigsetsize: size of sigset_t type or larger
*/
-SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
+SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
{
- return do_sigpending(set, sigsetsize);
+ sigset_t set;
+ int err = do_sigpending(&set, sigsetsize);
+ if (!err && copy_to_user(uset, &set, sigsetsize))
+ err = -EFAULT;
+ return err;
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
+ compat_size_t, sigsetsize)
+{
+#ifdef __BIG_ENDIAN
+ sigset_t set;
+ int err = do_sigpending(&set, sigsetsize);
+ if (!err) {
+ compat_sigset_t set32;
+ sigset_to_compat(&set32, &set);
+ /* we can get here only if sigsetsize <= sizeof(set) */
+ if (copy_to_user(uset, &set32, sigsetsize))
+ err = -EFAULT;
+ }
+ return err;
+#else
+ return sys_rt_sigpending((sigset_t __user *)uset, sigsetsize);
+#endif
}
+#endif
#ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
@@ -2927,6 +2991,22 @@ SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
return do_tkill(0, pid, sig);
}
+static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info)
+{
+ /* Not even root can pretend to send signals from the kernel.
+ * Nor can they impersonate a kill()/tgkill(), which adds source info.
+ */
+ if (info->si_code >= 0 || info->si_code == SI_TKILL) {
+ /* We used to allow any < 0 si_code */
+ WARN_ON_ONCE(info->si_code < 0);
+ return -EPERM;
+ }
+ info->si_signo = sig;
+
+ /* POSIX.1b doesn't mention process groups. */
+ return kill_proc_info(sig, info, pid);
+}
+
/**
* sys_rt_sigqueueinfo - send signal information to a signal
* @pid: the PID of the thread
@@ -2937,25 +3017,26 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
siginfo_t __user *, uinfo)
{
siginfo_t info;
-
if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
return -EFAULT;
+ return do_rt_sigqueueinfo(pid, sig, &info);
+}
- /* Not even root can pretend to send signals from the kernel.
- * Nor can they impersonate a kill()/tgkill(), which adds source info.
- */
- if (info.si_code >= 0 || info.si_code == SI_TKILL) {
- /* We used to allow any < 0 si_code */
- WARN_ON_ONCE(info.si_code < 0);
- return -EPERM;
- }
- info.si_signo = sig;
-
- /* POSIX.1b doesn't mention process groups. */
- return kill_proc_info(sig, &info, pid);
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
+ compat_pid_t, pid,
+ int, sig,
+ struct compat_siginfo __user *, uinfo)
+{
+ siginfo_t info;
+ int ret = copy_siginfo_from_user32(&info, uinfo);
+ if (unlikely(ret))
+ return ret;
+ return do_rt_sigqueueinfo(pid, sig, &info);
}
+#endif
-long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
{
/* This is only valid for single tasks */
if (pid <= 0 || tgid <= 0)
@@ -2985,6 +3066,21 @@ SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
}
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
+ compat_pid_t, tgid,
+ compat_pid_t, pid,
+ int, sig,
+ struct compat_siginfo __user *, uinfo)
+{
+ siginfo_t info;
+
+ if (copy_siginfo_from_user32(&info, uinfo))
+ return -EFAULT;
+ return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+#endif
+
int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
{
struct task_struct *t = current;
@@ -3030,7 +3126,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
return 0;
}
-int
+static int
do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
{
stack_t oss;
@@ -3095,12 +3191,10 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
out:
return error;
}
-#ifdef CONFIG_GENERIC_SIGALTSTACK
SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
{
return do_sigaltstack(uss, uoss, current_user_stack_pointer());
}
-#endif
int restore_altstack(const stack_t __user *uss)
{
@@ -3118,7 +3212,6 @@ int __save_altstack(stack_t __user *uss, unsigned long sp)
}
#ifdef CONFIG_COMPAT
-#ifdef CONFIG_GENERIC_SIGALTSTACK
COMPAT_SYSCALL_DEFINE2(sigaltstack,
const compat_stack_t __user *, uss_ptr,
compat_stack_t __user *, uoss_ptr)
@@ -3168,7 +3261,6 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
__put_user(t->sas_ss_size, &uss->ss_size);
}
#endif
-#endif
#ifdef __ARCH_WANT_SYS_SIGPENDING
@@ -3178,7 +3270,7 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
*/
SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
{
- return do_sigpending(set, sizeof(*set));
+ return sys_rt_sigpending((sigset_t __user *)set, sizeof(old_sigset_t));
}
#endif
@@ -3234,7 +3326,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
}
#endif /* __ARCH_WANT_SYS_SIGPROCMASK */
-#ifdef __ARCH_WANT_SYS_RT_SIGACTION
+#ifndef CONFIG_ODD_RT_SIGACTION
/**
* sys_rt_sigaction - alter an action taken by a process
* @sig: signal to be sent
@@ -3268,7 +3360,132 @@ SYSCALL_DEFINE4(rt_sigaction, int, sig,
out:
return ret;
}
-#endif /* __ARCH_WANT_SYS_RT_SIGACTION */
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
+ const struct compat_sigaction __user *, act,
+ struct compat_sigaction __user *, oact,
+ compat_size_t, sigsetsize)
+{
+ struct k_sigaction new_ka, old_ka;
+ compat_sigset_t mask;
+#ifdef __ARCH_HAS_SA_RESTORER
+ compat_uptr_t restorer;
+#endif
+ int ret;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+
+ if (act) {
+ compat_uptr_t handler;
+ ret = get_user(handler, &act->sa_handler);
+ new_ka.sa.sa_handler = compat_ptr(handler);
+#ifdef __ARCH_HAS_SA_RESTORER
+ ret |= get_user(restorer, &act->sa_restorer);
+ new_ka.sa.sa_restorer = compat_ptr(restorer);
+#endif
+ ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask));
+ ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+ if (ret)
+ return -EFAULT;
+ sigset_from_compat(&new_ka.sa.sa_mask, &mask);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+ if (!ret && oact) {
+ sigset_to_compat(&mask, &old_ka.sa.sa_mask);
+ ret = put_user(ptr_to_compat(old_ka.sa.sa_handler),
+ &oact->sa_handler);
+ ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask));
+ ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+#ifdef __ARCH_HAS_SA_RESTORER
+ ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+ &oact->sa_restorer);
+#endif
+ }
+ return ret;
+}
+#endif
+#endif /* !CONFIG_ODD_RT_SIGACTION */
+
+#ifdef CONFIG_OLD_SIGACTION
+SYSCALL_DEFINE3(sigaction, int, sig,
+ const struct old_sigaction __user *, act,
+ struct old_sigaction __user *, oact)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+
+ if (act) {
+ old_sigset_t mask;
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
+ __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+ __get_user(mask, &act->sa_mask))
+ return -EFAULT;
+#ifdef __ARCH_HAS_KA_RESTORER
+ new_ka.ka_restorer = NULL;
+#endif
+ siginitset(&new_ka.sa.sa_mask, mask);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+ return -EFAULT;
+ }
+
+ return ret;
+}
+#endif
+#ifdef CONFIG_COMPAT_OLD_SIGACTION
+COMPAT_SYSCALL_DEFINE3(sigaction, int, sig,
+ const struct compat_old_sigaction __user *, act,
+ struct compat_old_sigaction __user *, oact)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+ compat_old_sigset_t mask;
+ compat_uptr_t handler, restorer;
+
+ if (act) {
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ __get_user(handler, &act->sa_handler) ||
+ __get_user(restorer, &act->sa_restorer) ||
+ __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+ __get_user(mask, &act->sa_mask))
+ return -EFAULT;
+
+#ifdef __ARCH_HAS_KA_RESTORER
+ new_ka.ka_restorer = NULL;
+#endif
+ new_ka.sa.sa_handler = compat_ptr(handler);
+ new_ka.sa.sa_restorer = compat_ptr(restorer);
+ siginitset(&new_ka.sa.sa_mask, mask);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_handler),
+ &oact->sa_handler) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+ &oact->sa_restorer) ||
+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+ return -EFAULT;
+ }
+ return ret;
+}
+#endif
#ifdef __ARCH_WANT_SYS_SGETMASK
@@ -3336,7 +3553,6 @@ int sigsuspend(sigset_t *set)
return -ERESTARTNOHAND;
}
-#ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
/**
* sys_rt_sigsuspend - replace the signal mask for a value with the
* @unewset value until a signal is received
@@ -3355,7 +3571,45 @@ SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
return -EFAULT;
return sigsuspend(&newset);
}
-#endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(rt_sigsuspend, compat_sigset_t __user *, unewset, compat_size_t, sigsetsize)
+{
+#ifdef __BIG_ENDIAN
+ sigset_t newset;
+ compat_sigset_t newset32;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+
+ if (copy_from_user(&newset32, unewset, sizeof(compat_sigset_t)))
+ return -EFAULT;
+ sigset_from_compat(&newset, &newset32);
+ return sigsuspend(&newset);
+#else
+ /* on little-endian bitmaps don't care about granularity */
+ return sys_rt_sigsuspend((sigset_t __user *)unewset, sigsetsize);
+#endif
+}
+#endif
+
+#ifdef CONFIG_OLD_SIGSUSPEND
+SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask)
+{
+ sigset_t blocked;
+ siginitset(&blocked, mask);
+ return sigsuspend(&blocked);
+}
+#endif
+#ifdef CONFIG_OLD_SIGSUSPEND3
+SYSCALL_DEFINE3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask)
+{
+ sigset_t blocked;
+ siginitset(&blocked, mask);
+ return sigsuspend(&blocked);
+}
+#endif
__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
{
diff --git a/kernel/smp.c b/kernel/smp.c
index 69f38bd98b42..8e451f3ff51b 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -16,22 +16,12 @@
#include "smpboot.h"
#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
-static struct {
- struct list_head queue;
- raw_spinlock_t lock;
-} call_function __cacheline_aligned_in_smp =
- {
- .queue = LIST_HEAD_INIT(call_function.queue),
- .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock),
- };
-
enum {
CSD_FLAG_LOCK = 0x01,
};
struct call_function_data {
- struct call_single_data csd;
- atomic_t refs;
+ struct call_single_data __percpu *csd;
cpumask_var_t cpumask;
cpumask_var_t cpumask_ipi;
};
@@ -60,6 +50,11 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
cpu_to_node(cpu)))
return notifier_from_errno(-ENOMEM);
+ cfd->csd = alloc_percpu(struct call_single_data);
+ if (!cfd->csd) {
+ free_cpumask_var(cfd->cpumask);
+ return notifier_from_errno(-ENOMEM);
+ }
break;
#ifdef CONFIG_HOTPLUG_CPU
@@ -70,6 +65,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD_FROZEN:
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
+ free_percpu(cfd->csd);
break;
#endif
};
@@ -171,85 +167,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
}
/*
- * Invoked by arch to handle an IPI for call function. Must be called with
- * interrupts disabled.
- */
-void generic_smp_call_function_interrupt(void)
-{
- struct call_function_data *data;
- int cpu = smp_processor_id();
-
- /*
- * Shouldn't receive this interrupt on a cpu that is not yet online.
- */
- WARN_ON_ONCE(!cpu_online(cpu));
-
- /*
- * Ensure entry is visible on call_function_queue after we have
- * entered the IPI. See comment in smp_call_function_many.
- * If we don't have this, then we may miss an entry on the list
- * and never get another IPI to process it.
- */
- smp_mb();
-
- /*
- * It's ok to use list_for_each_rcu() here even though we may
- * delete 'pos', since list_del_rcu() doesn't clear ->next
- */
- list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
- int refs;
- smp_call_func_t func;
-
- /*
- * Since we walk the list without any locks, we might
- * see an entry that was completed, removed from the
- * list and is in the process of being reused.
- *
- * We must check that the cpu is in the cpumask before
- * checking the refs, and both must be set before
- * executing the callback on this cpu.
- */
-
- if (!cpumask_test_cpu(cpu, data->cpumask))
- continue;
-
- smp_rmb();
-
- if (atomic_read(&data->refs) == 0)
- continue;
-
- func = data->csd.func; /* save for later warn */
- func(data->csd.info);
-
- /*
- * If the cpu mask is not still set then func enabled
- * interrupts (BUG), and this cpu took another smp call
- * function interrupt and executed func(info) twice
- * on this cpu. That nested execution decremented refs.
- */
- if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) {
- WARN(1, "%pf enabled interrupts and double executed\n", func);
- continue;
- }
-
- refs = atomic_dec_return(&data->refs);
- WARN_ON(refs < 0);
-
- if (refs)
- continue;
-
- WARN_ON(!cpumask_empty(data->cpumask));
-
- raw_spin_lock(&call_function.lock);
- list_del_rcu(&data->csd.list);
- raw_spin_unlock(&call_function.lock);
-
- csd_unlock(&data->csd);
- }
-
-}
-
-/*
* Invoked by arch to handle an IPI for call function single. Must be
* called from the arch with interrupts disabled.
*/
@@ -453,8 +370,7 @@ void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait)
{
struct call_function_data *data;
- unsigned long flags;
- int refs, cpu, next_cpu, this_cpu = smp_processor_id();
+ int cpu, next_cpu, this_cpu = smp_processor_id();
/*
* Can deadlock when called with interrupts disabled.
@@ -486,50 +402,13 @@ void smp_call_function_many(const struct cpumask *mask,
}
data = &__get_cpu_var(cfd_data);
- csd_lock(&data->csd);
-
- /* This BUG_ON verifies our reuse assertions and can be removed */
- BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask));
-
- /*
- * The global call function queue list add and delete are protected
- * by a lock, but the list is traversed without any lock, relying
- * on the rcu list add and delete to allow safe concurrent traversal.
- * We reuse the call function data without waiting for any grace
- * period after some other cpu removes it from the global queue.
- * This means a cpu might find our data block as it is being
- * filled out.
- *
- * We hold off the interrupt handler on the other cpu by
- * ordering our writes to the cpu mask vs our setting of the
- * refs counter. We assert only the cpu owning the data block
- * will set a bit in cpumask, and each bit will only be cleared
- * by the subject cpu. Each cpu must first find its bit is
- * set and then check that refs is set indicating the element is
- * ready to be processed, otherwise it must skip the entry.
- *
- * On the previous iteration refs was set to 0 by another cpu.
- * To avoid the use of transitivity, set the counter to 0 here
- * so the wmb will pair with the rmb in the interrupt handler.
- */
- atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */
-
- data->csd.func = func;
- data->csd.info = info;
- /* Ensure 0 refs is visible before mask. Also orders func and info */
- smp_wmb();
-
- /* We rely on the "and" being processed before the store */
cpumask_and(data->cpumask, mask, cpu_online_mask);
cpumask_clear_cpu(this_cpu, data->cpumask);
- refs = cpumask_weight(data->cpumask);
/* Some callers race with other cpus changing the passed mask */
- if (unlikely(!refs)) {
- csd_unlock(&data->csd);
+ if (unlikely(!cpumask_weight(data->cpumask)))
return;
- }
/*
* After we put an entry into the list, data->cpumask
@@ -537,34 +416,32 @@ void smp_call_function_many(const struct cpumask *mask,
* a SMP function call, so data->cpumask will be zero.
*/
cpumask_copy(data->cpumask_ipi, data->cpumask);
- raw_spin_lock_irqsave(&call_function.lock, flags);
- /*
- * Place entry at the _HEAD_ of the list, so that any cpu still
- * observing the entry in generic_smp_call_function_interrupt()
- * will not miss any other list entries:
- */
- list_add_rcu(&data->csd.list, &call_function.queue);
- /*
- * We rely on the wmb() in list_add_rcu to complete our writes
- * to the cpumask before this write to refs, which indicates
- * data is on the list and is ready to be processed.
- */
- atomic_set(&data->refs, refs);
- raw_spin_unlock_irqrestore(&call_function.lock, flags);
- /*
- * Make the list addition visible before sending the ipi.
- * (IPIs must obey or appear to obey normal Linux cache
- * coherency rules -- see comment in generic_exec_single).
- */
- smp_mb();
+ for_each_cpu(cpu, data->cpumask) {
+ struct call_single_data *csd = per_cpu_ptr(data->csd, cpu);
+ struct call_single_queue *dst =
+ &per_cpu(call_single_queue, cpu);
+ unsigned long flags;
+
+ csd_lock(csd);
+ csd->func = func;
+ csd->info = info;
+
+ raw_spin_lock_irqsave(&dst->lock, flags);
+ list_add_tail(&csd->list, &dst->list);
+ raw_spin_unlock_irqrestore(&dst->lock, flags);
+ }
/* Send a message to all CPUs in the map */
arch_send_call_function_ipi_mask(data->cpumask_ipi);
- /* Optionally wait for the CPUs to complete */
- if (wait)
- csd_lock_wait(&data->csd);
+ if (wait) {
+ for_each_cpu(cpu, data->cpumask) {
+ struct call_single_data *csd =
+ per_cpu_ptr(data->csd, cpu);
+ csd_lock_wait(csd);
+ }
+ }
}
EXPORT_SYMBOL(smp_call_function_many);
diff --git a/kernel/sys.c b/kernel/sys.c
index 265b37690421..2e18d33ca775 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -47,6 +47,7 @@
#include <linux/syscalls.h>
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
+#include <linux/binfmts.h>
#include <linux/kmsg_dump.h>
/* Move somewhere else to avoid recompiling? */
@@ -433,11 +434,12 @@ static DEFINE_MUTEX(reboot_mutex);
SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
void __user *, arg)
{
+ struct pid_namespace *pid_ns = task_active_pid_ns(current);
char buffer[256];
int ret = 0;
/* We only trust the superuser with rebooting the system. */
- if (!capable(CAP_SYS_BOOT))
+ if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT))
return -EPERM;
/* For safety, we require "magic" arguments. */
@@ -453,7 +455,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
* pid_namespace, the command is handled by reboot_pid_ns() which will
* call do_exit().
*/
- ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
+ ret = reboot_pid_ns(pid_ns, cmd);
if (ret)
return ret;
@@ -2012,160 +2014,159 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = 0;
switch (option) {
- case PR_SET_PDEATHSIG:
- if (!valid_signal(arg2)) {
- error = -EINVAL;
- break;
- }
- me->pdeath_signal = arg2;
- break;
- case PR_GET_PDEATHSIG:
- error = put_user(me->pdeath_signal, (int __user *)arg2);
- break;
- case PR_GET_DUMPABLE:
- error = get_dumpable(me->mm);
+ case PR_SET_PDEATHSIG:
+ if (!valid_signal(arg2)) {
+ error = -EINVAL;
break;
- case PR_SET_DUMPABLE:
- if (arg2 < 0 || arg2 > 1) {
- error = -EINVAL;
- break;
- }
- set_dumpable(me->mm, arg2);
+ }
+ me->pdeath_signal = arg2;
+ break;
+ case PR_GET_PDEATHSIG:
+ error = put_user(me->pdeath_signal, (int __user *)arg2);
+ break;
+ case PR_GET_DUMPABLE:
+ error = get_dumpable(me->mm);
+ break;
+ case PR_SET_DUMPABLE:
+ if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) {
+ error = -EINVAL;
break;
+ }
+ set_dumpable(me->mm, arg2);
+ break;
- case PR_SET_UNALIGN:
- error = SET_UNALIGN_CTL(me, arg2);
- break;
- case PR_GET_UNALIGN:
- error = GET_UNALIGN_CTL(me, arg2);
- break;
- case PR_SET_FPEMU:
- error = SET_FPEMU_CTL(me, arg2);
- break;
- case PR_GET_FPEMU:
- error = GET_FPEMU_CTL(me, arg2);
- break;
- case PR_SET_FPEXC:
- error = SET_FPEXC_CTL(me, arg2);
- break;
- case PR_GET_FPEXC:
- error = GET_FPEXC_CTL(me, arg2);
- break;
- case PR_GET_TIMING:
- error = PR_TIMING_STATISTICAL;
- break;
- case PR_SET_TIMING:
- if (arg2 != PR_TIMING_STATISTICAL)
- error = -EINVAL;
- break;
- case PR_SET_NAME:
- comm[sizeof(me->comm)-1] = 0;
- if (strncpy_from_user(comm, (char __user *)arg2,
- sizeof(me->comm) - 1) < 0)
- return -EFAULT;
- set_task_comm(me, comm);
- proc_comm_connector(me);
- break;
- case PR_GET_NAME:
- get_task_comm(comm, me);
- if (copy_to_user((char __user *)arg2, comm,
- sizeof(comm)))
- return -EFAULT;
- break;
- case PR_GET_ENDIAN:
- error = GET_ENDIAN(me, arg2);
- break;
- case PR_SET_ENDIAN:
- error = SET_ENDIAN(me, arg2);
- break;
- case PR_GET_SECCOMP:
- error = prctl_get_seccomp();
- break;
- case PR_SET_SECCOMP:
- error = prctl_set_seccomp(arg2, (char __user *)arg3);
- break;
- case PR_GET_TSC:
- error = GET_TSC_CTL(arg2);
- break;
- case PR_SET_TSC:
- error = SET_TSC_CTL(arg2);
- break;
- case PR_TASK_PERF_EVENTS_DISABLE:
- error = perf_event_task_disable();
- break;
- case PR_TASK_PERF_EVENTS_ENABLE:
- error = perf_event_task_enable();
- break;
- case PR_GET_TIMERSLACK:
- error = current->timer_slack_ns;
- break;
- case PR_SET_TIMERSLACK:
- if (arg2 <= 0)
- current->timer_slack_ns =
+ case PR_SET_UNALIGN:
+ error = SET_UNALIGN_CTL(me, arg2);
+ break;
+ case PR_GET_UNALIGN:
+ error = GET_UNALIGN_CTL(me, arg2);
+ break;
+ case PR_SET_FPEMU:
+ error = SET_FPEMU_CTL(me, arg2);
+ break;
+ case PR_GET_FPEMU:
+ error = GET_FPEMU_CTL(me, arg2);
+ break;
+ case PR_SET_FPEXC:
+ error = SET_FPEXC_CTL(me, arg2);
+ break;
+ case PR_GET_FPEXC:
+ error = GET_FPEXC_CTL(me, arg2);
+ break;
+ case PR_GET_TIMING:
+ error = PR_TIMING_STATISTICAL;
+ break;
+ case PR_SET_TIMING:
+ if (arg2 != PR_TIMING_STATISTICAL)
+ error = -EINVAL;
+ break;
+ case PR_SET_NAME:
+ comm[sizeof(me->comm) - 1] = 0;
+ if (strncpy_from_user(comm, (char __user *)arg2,
+ sizeof(me->comm) - 1) < 0)
+ return -EFAULT;
+ set_task_comm(me, comm);
+ proc_comm_connector(me);
+ break;
+ case PR_GET_NAME:
+ get_task_comm(comm, me);
+ if (copy_to_user((char __user *)arg2, comm, sizeof(comm)))
+ return -EFAULT;
+ break;
+ case PR_GET_ENDIAN:
+ error = GET_ENDIAN(me, arg2);
+ break;
+ case PR_SET_ENDIAN:
+ error = SET_ENDIAN(me, arg2);
+ break;
+ case PR_GET_SECCOMP:
+ error = prctl_get_seccomp();
+ break;
+ case PR_SET_SECCOMP:
+ error = prctl_set_seccomp(arg2, (char __user *)arg3);
+ break;
+ case PR_GET_TSC:
+ error = GET_TSC_CTL(arg2);
+ break;
+ case PR_SET_TSC:
+ error = SET_TSC_CTL(arg2);
+ break;
+ case PR_TASK_PERF_EVENTS_DISABLE:
+ error = perf_event_task_disable();
+ break;
+ case PR_TASK_PERF_EVENTS_ENABLE:
+ error = perf_event_task_enable();
+ break;
+ case PR_GET_TIMERSLACK:
+ error = current->timer_slack_ns;
+ break;
+ case PR_SET_TIMERSLACK:
+ if (arg2 <= 0)
+ current->timer_slack_ns =
current->default_timer_slack_ns;
- else
- current->timer_slack_ns = arg2;
- break;
- case PR_MCE_KILL:
- if (arg4 | arg5)
- return -EINVAL;
- switch (arg2) {
- case PR_MCE_KILL_CLEAR:
- if (arg3 != 0)
- return -EINVAL;
- current->flags &= ~PF_MCE_PROCESS;
- break;
- case PR_MCE_KILL_SET:
- current->flags |= PF_MCE_PROCESS;
- if (arg3 == PR_MCE_KILL_EARLY)
- current->flags |= PF_MCE_EARLY;
- else if (arg3 == PR_MCE_KILL_LATE)
- current->flags &= ~PF_MCE_EARLY;
- else if (arg3 == PR_MCE_KILL_DEFAULT)
- current->flags &=
- ~(PF_MCE_EARLY|PF_MCE_PROCESS);
- else
- return -EINVAL;
- break;
- default:
+ else
+ current->timer_slack_ns = arg2;
+ break;
+ case PR_MCE_KILL:
+ if (arg4 | arg5)
+ return -EINVAL;
+ switch (arg2) {
+ case PR_MCE_KILL_CLEAR:
+ if (arg3 != 0)
return -EINVAL;
- }
+ current->flags &= ~PF_MCE_PROCESS;
break;
- case PR_MCE_KILL_GET:
- if (arg2 | arg3 | arg4 | arg5)
- return -EINVAL;
- if (current->flags & PF_MCE_PROCESS)
- error = (current->flags & PF_MCE_EARLY) ?
- PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
+ case PR_MCE_KILL_SET:
+ current->flags |= PF_MCE_PROCESS;
+ if (arg3 == PR_MCE_KILL_EARLY)
+ current->flags |= PF_MCE_EARLY;
+ else if (arg3 == PR_MCE_KILL_LATE)
+ current->flags &= ~PF_MCE_EARLY;
+ else if (arg3 == PR_MCE_KILL_DEFAULT)
+ current->flags &=
+ ~(PF_MCE_EARLY|PF_MCE_PROCESS);
else
- error = PR_MCE_KILL_DEFAULT;
- break;
- case PR_SET_MM:
- error = prctl_set_mm(arg2, arg3, arg4, arg5);
- break;
- case PR_GET_TID_ADDRESS:
- error = prctl_get_tid_address(me, (int __user **)arg2);
- break;
- case PR_SET_CHILD_SUBREAPER:
- me->signal->is_child_subreaper = !!arg2;
- break;
- case PR_GET_CHILD_SUBREAPER:
- error = put_user(me->signal->is_child_subreaper,
- (int __user *) arg2);
- break;
- case PR_SET_NO_NEW_PRIVS:
- if (arg2 != 1 || arg3 || arg4 || arg5)
return -EINVAL;
-
- current->no_new_privs = 1;
break;
- case PR_GET_NO_NEW_PRIVS:
- if (arg2 || arg3 || arg4 || arg5)
- return -EINVAL;
- return current->no_new_privs ? 1 : 0;
default:
- error = -EINVAL;
- break;
+ return -EINVAL;
+ }
+ break;
+ case PR_MCE_KILL_GET:
+ if (arg2 | arg3 | arg4 | arg5)
+ return -EINVAL;
+ if (current->flags & PF_MCE_PROCESS)
+ error = (current->flags & PF_MCE_EARLY) ?
+ PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
+ else
+ error = PR_MCE_KILL_DEFAULT;
+ break;
+ case PR_SET_MM:
+ error = prctl_set_mm(arg2, arg3, arg4, arg5);
+ break;
+ case PR_GET_TID_ADDRESS:
+ error = prctl_get_tid_address(me, (int __user **)arg2);
+ break;
+ case PR_SET_CHILD_SUBREAPER:
+ me->signal->is_child_subreaper = !!arg2;
+ break;
+ case PR_GET_CHILD_SUBREAPER:
+ error = put_user(me->signal->is_child_subreaper,
+ (int __user *)arg2);
+ break;
+ case PR_SET_NO_NEW_PRIVS:
+ if (arg2 != 1 || arg3 || arg4 || arg5)
+ return -EINVAL;
+
+ current->no_new_privs = 1;
+ break;
+ case PR_GET_NO_NEW_PRIVS:
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ return current->no_new_privs ? 1 : 0;
+ default:
+ error = -EINVAL;
+ break;
}
return error;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4fc9be955c71..d8df00e69c14 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,7 +105,6 @@ extern char core_pattern[];
extern unsigned int core_pipe_limit;
#endif
extern int pid_max;
-extern int min_free_kbytes;
extern int pid_max_min, pid_max_max;
extern int sysctl_drop_caches;
extern int percpu_pagelist_fraction;
@@ -162,10 +161,13 @@ extern int unaligned_enabled;
#endif
#ifdef CONFIG_IA64
-extern int no_unaligned_warning;
extern int unaligned_dump_stack;
#endif
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
+extern int no_unaligned_warning;
+#endif
+
#ifdef CONFIG_PROC_SYSCTL
static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -919,7 +921,7 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
#endif
-#ifdef CONFIG_IA64
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
{
.procname = "ignore-unaligned-usertrap",
.data = &no_unaligned_warning,
@@ -927,6 +929,8 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#endif
+#ifdef CONFIG_IA64
{
.procname = "unaligned-dump-stack",
.data = &unaligned_dump_stack,
@@ -2014,7 +2018,7 @@ static int proc_taint(struct ctl_table *table, int write,
int i;
for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
if ((tmptaint >> i) & 1)
- add_taint(i);
+ add_taint(i, LOCKDEP_STILL_OK);
}
}
diff --git a/kernel/time.c b/kernel/time.c
index c2a27dd93142..f8342a41efa6 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -240,7 +240,7 @@ EXPORT_SYMBOL(current_fs_time);
* Avoid unnecessary multiplications/divisions in the
* two most common HZ cases:
*/
-inline unsigned int jiffies_to_msecs(const unsigned long j)
+unsigned int jiffies_to_msecs(const unsigned long j)
{
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
return (MSEC_PER_SEC / HZ) * j;
@@ -256,7 +256,7 @@ inline unsigned int jiffies_to_msecs(const unsigned long j)
}
EXPORT_SYMBOL(jiffies_to_msecs);
-inline unsigned int jiffies_to_usecs(const unsigned long j)
+unsigned int jiffies_to_usecs(const unsigned long j)
{
#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
return (USEC_PER_SEC / HZ) * j;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 30b6de0d977c..c6d6400ee137 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -339,6 +339,7 @@ void clockevents_config_and_register(struct clock_event_device *dev,
clockevents_config(dev, freq);
clockevents_register_device(dev);
}
+EXPORT_SYMBOL_GPL(clockevents_config_and_register);
/**
* clockevents_update_freq - Update frequency and reprogram a clock event device.
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index b10a42bb0165..072bb066bb7d 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -23,7 +23,7 @@
* NTP timekeeping variables:
*/
-DEFINE_SPINLOCK(ntp_lock);
+DEFINE_RAW_SPINLOCK(ntp_lock);
/* USER_HZ period (usecs): */
@@ -348,7 +348,7 @@ void ntp_clear(void)
{
unsigned long flags;
- spin_lock_irqsave(&ntp_lock, flags);
+ raw_spin_lock_irqsave(&ntp_lock, flags);
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
@@ -362,7 +362,7 @@ void ntp_clear(void)
/* Clear PPS state variables */
pps_clear();
- spin_unlock_irqrestore(&ntp_lock, flags);
+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
}
@@ -372,9 +372,9 @@ u64 ntp_tick_length(void)
unsigned long flags;
s64 ret;
- spin_lock_irqsave(&ntp_lock, flags);
+ raw_spin_lock_irqsave(&ntp_lock, flags);
ret = tick_length;
- spin_unlock_irqrestore(&ntp_lock, flags);
+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
return ret;
}
@@ -395,7 +395,7 @@ int second_overflow(unsigned long secs)
int leap = 0;
unsigned long flags;
- spin_lock_irqsave(&ntp_lock, flags);
+ raw_spin_lock_irqsave(&ntp_lock, flags);
/*
* Leap second processing. If in leap-insert state at the end of the
@@ -479,7 +479,7 @@ int second_overflow(unsigned long secs)
time_adjust = 0;
out:
- spin_unlock_irqrestore(&ntp_lock, flags);
+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
return leap;
}
@@ -672,7 +672,7 @@ int do_adjtimex(struct timex *txc)
getnstimeofday(&ts);
- spin_lock_irq(&ntp_lock);
+ raw_spin_lock_irq(&ntp_lock);
if (txc->modes & ADJ_ADJTIME) {
long save_adjust = time_adjust;
@@ -714,7 +714,7 @@ int do_adjtimex(struct timex *txc)
/* fill PPS status fields */
pps_fill_timex(txc);
- spin_unlock_irq(&ntp_lock);
+ raw_spin_unlock_irq(&ntp_lock);
txc->time.tv_sec = ts.tv_sec;
txc->time.tv_usec = ts.tv_nsec;
@@ -912,7 +912,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
pts_norm = pps_normalize_ts(*phase_ts);
- spin_lock_irqsave(&ntp_lock, flags);
+ raw_spin_lock_irqsave(&ntp_lock, flags);
/* clear the error bits, they will be set again if needed */
time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
@@ -925,7 +925,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
* just start the frequency interval */
if (unlikely(pps_fbase.tv_sec == 0)) {
pps_fbase = *raw_ts;
- spin_unlock_irqrestore(&ntp_lock, flags);
+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
return;
}
@@ -940,7 +940,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
time_status |= STA_PPSJITTER;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
- spin_unlock_irqrestore(&ntp_lock, flags);
+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
pr_err("hardpps: PPSJITTER: bad pulse\n");
return;
}
@@ -957,7 +957,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
hardpps_update_phase(pts_norm.nsec);
- spin_unlock_irqrestore(&ntp_lock, flags);
+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
}
EXPORT_SYMBOL(hardpps);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 1e35515a875e..9a0bc98fbe1d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -138,6 +138,20 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
}
/* Timekeeper helper functions. */
+
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+u32 (*arch_gettimeoffset)(void);
+
+u32 get_arch_timeoffset(void)
+{
+ if (likely(arch_gettimeoffset))
+ return arch_gettimeoffset();
+ return 0;
+}
+#else
+static inline u32 get_arch_timeoffset(void) { return 0; }
+#endif
+
static inline s64 timekeeping_get_ns(struct timekeeper *tk)
{
cycle_t cycle_now, cycle_delta;
@@ -154,8 +168,8 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk)
nsec = cycle_delta * tk->mult + tk->xtime_nsec;
nsec >>= tk->shift;
- /* If arch requires, add in gettimeoffset() */
- return nsec + arch_gettimeoffset();
+ /* If arch requires, add in get_arch_timeoffset() */
+ return nsec + get_arch_timeoffset();
}
static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
@@ -174,8 +188,8 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
/* convert delta to nanoseconds. */
nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
- /* If arch requires, add in gettimeoffset() */
- return nsec + arch_gettimeoffset();
+ /* If arch requires, add in get_arch_timeoffset() */
+ return nsec + get_arch_timeoffset();
}
static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
@@ -257,8 +271,8 @@ static void timekeeping_forward_now(struct timekeeper *tk)
tk->xtime_nsec += cycle_delta * tk->mult;
- /* If arch requires, add in gettimeoffset() */
- tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
+ /* If arch requires, add in get_arch_timeoffset() */
+ tk->xtime_nsec += (u64)get_arch_timeoffset() << tk->shift;
tk_normalize_xtime(tk);
diff --git a/kernel/user.c b/kernel/user.c
index 33acb5e53a5f..57ebfd42023c 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -47,9 +47,7 @@ struct user_namespace init_user_ns = {
.count = 4294967295U,
},
},
- .kref = {
- .refcount = ATOMIC_INIT(3),
- },
+ .count = ATOMIC_INIT(3),
.owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID,
.proc_inum = PROC_USER_INIT_INO,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 2b042c42fbc4..8b650837083e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -78,7 +78,7 @@ int create_user_ns(struct cred *new)
return ret;
}
- kref_init(&ns->kref);
+ atomic_set(&ns->count, 1);
/* Leave the new->user_ns reference with the new user namespace. */
ns->parent = parent_ns;
ns->owner = owner;
@@ -104,15 +104,16 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
return create_user_ns(cred);
}
-void free_user_ns(struct kref *kref)
+void free_user_ns(struct user_namespace *ns)
{
- struct user_namespace *parent, *ns =
- container_of(kref, struct user_namespace, kref);
+ struct user_namespace *parent;
- parent = ns->parent;
- proc_free_inum(ns->proc_inum);
- kmem_cache_free(user_ns_cachep, ns);
- put_user_ns(parent);
+ do {
+ parent = ns->parent;
+ proc_free_inum(ns->proc_inum);
+ kmem_cache_free(user_ns_cachep, ns);
+ ns = parent;
+ } while (atomic_dec_and_test(&parent->count));
}
EXPORT_SYMBOL(free_user_ns);
@@ -519,6 +520,42 @@ struct seq_operations proc_projid_seq_operations = {
.show = projid_m_show,
};
+static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent *extent)
+{
+ u32 upper_first, lower_first, upper_last, lower_last;
+ unsigned idx;
+
+ upper_first = extent->first;
+ lower_first = extent->lower_first;
+ upper_last = upper_first + extent->count - 1;
+ lower_last = lower_first + extent->count - 1;
+
+ for (idx = 0; idx < new_map->nr_extents; idx++) {
+ u32 prev_upper_first, prev_lower_first;
+ u32 prev_upper_last, prev_lower_last;
+ struct uid_gid_extent *prev;
+
+ prev = &new_map->extent[idx];
+
+ prev_upper_first = prev->first;
+ prev_lower_first = prev->lower_first;
+ prev_upper_last = prev_upper_first + prev->count - 1;
+ prev_lower_last = prev_lower_first + prev->count - 1;
+
+ /* Does the upper range intersect a previous extent? */
+ if ((prev_upper_first <= upper_last) &&
+ (prev_upper_last >= upper_first))
+ return true;
+
+ /* Does the lower range intersect a previous extent? */
+ if ((prev_lower_first <= lower_last) &&
+ (prev_lower_last >= lower_first))
+ return true;
+ }
+ return false;
+}
+
+
static DEFINE_MUTEX(id_map_mutex);
static ssize_t map_write(struct file *file, const char __user *buf,
@@ -531,7 +568,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
struct user_namespace *ns = seq->private;
struct uid_gid_map new_map;
unsigned idx;
- struct uid_gid_extent *extent, *last = NULL;
+ struct uid_gid_extent *extent = NULL;
unsigned long page = 0;
char *kbuf, *pos, *next_line;
ssize_t ret = -EINVAL;
@@ -634,14 +671,11 @@ static ssize_t map_write(struct file *file, const char __user *buf,
if ((extent->lower_first + extent->count) <= extent->lower_first)
goto out;
- /* For now only accept extents that are strictly in order */
- if (last &&
- (((last->first + last->count) > extent->first) ||
- ((last->lower_first + last->count) > extent->lower_first)))
+ /* Do the ranges in extent overlap any previous extents? */
+ if (mappings_overlap(&new_map, extent))
goto out;
new_map.nr_extents++;
- last = extent;
/* Fail if the file contains too many extents */
if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 27689422aa92..4a944676358e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -113,9 +113,9 @@ static int get_softlockup_thresh(void)
* resolution, and we don't need to waste time with a big divide when
* 2^30ns == 1.074s.
*/
-static unsigned long get_timestamp(int this_cpu)
+static unsigned long get_timestamp(void)
{
- return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
+ return local_clock() >> 30LL; /* 2^30 ~= 10^9 */
}
static void set_sample_period(void)
@@ -133,9 +133,7 @@ static void set_sample_period(void)
/* Commands for resetting the watchdog */
static void __touch_watchdog(void)
{
- int this_cpu = smp_processor_id();
-
- __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
+ __this_cpu_write(watchdog_touch_ts, get_timestamp());
}
void touch_softlockup_watchdog(void)
@@ -196,7 +194,7 @@ static int is_hardlockup(void)
static int is_softlockup(unsigned long touch_ts)
{
- unsigned long now = get_timestamp(smp_processor_id());
+ unsigned long now = get_timestamp();
/* Warn about unreasonable delays: */
if (time_after(now, touch_ts + get_softlockup_thresh()))