diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/audit.c | 3 | ||||
| -rw-r--r-- | kernel/capability.c | 20 | ||||
| -rw-r--r-- | kernel/cpuset.c | 107 | ||||
| -rw-r--r-- | kernel/crash_dump.c | 11 | ||||
| -rw-r--r-- | kernel/exit.c | 4 | ||||
| -rw-r--r-- | kernel/fork.c | 7 | ||||
| -rw-r--r-- | kernel/itimer.c | 37 | ||||
| -rw-r--r-- | kernel/module.c | 15 | ||||
| -rw-r--r-- | kernel/panic.c | 9 | ||||
| -rw-r--r-- | kernel/posix-timers.c | 18 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 12 | ||||
| -rw-r--r-- | kernel/power/disk.c | 66 | ||||
| -rw-r--r-- | kernel/power/main.c | 21 | ||||
| -rw-r--r-- | kernel/power/poweroff.c | 4 | ||||
| -rw-r--r-- | kernel/power/process.c | 29 | ||||
| -rw-r--r-- | kernel/power/smp.c | 2 | ||||
| -rw-r--r-- | kernel/power/swsusp.c | 204 | ||||
| -rw-r--r-- | kernel/sched.c | 12 | ||||
| -rw-r--r-- | kernel/signal.c | 2 | ||||
| -rw-r--r-- | kernel/sys.c | 109 | ||||
| -rw-r--r-- | kernel/sys_ni.c | 4 | ||||
| -rw-r--r-- | kernel/sysctl.c | 30 | ||||
| -rw-r--r-- | kernel/time.c | 2 | ||||
| -rw-r--r-- | kernel/timer.c | 2 | ||||
| -rw-r--r-- | kernel/user.c | 4 | ||||
| -rw-r--r-- | kernel/workqueue.c | 2 |
26 files changed, 522 insertions, 214 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index ef35166fdc29..7f0699790d46 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,8 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive); + audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, + THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/kernel/capability.c b/kernel/capability.c index 64db1ee820c2..8986a37a67ea 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -31,8 +31,14 @@ static DEFINE_SPINLOCK(task_capability_lock); * uninteresting and/or not to be changed. */ -/* +/** * sys_capget - get the capabilities of a given process. + * @header: pointer to struct that contains capability version and + * target pid data + * @dataptr: pointer to struct that contains the effective, permitted, + * and inheritable capabilities that are returned + * + * Returns 0 on success and < 0 on error. */ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) { @@ -141,8 +147,14 @@ static inline int cap_set_all(kernel_cap_t *effective, return ret; } -/* - * sys_capset - set capabilities for a given process, all processes, or all +/** + * sys_capset - set capabilities for a process or a group of processes + * @header: pointer to struct that contains capability version and + * target pid data + * @data: pointer to struct that contains the effective, permitted, + * and inheritable capabilities + * + * Set capabilities for a given process, all processes, or all * processes in a given process group. * * The restrictions on setting capabilities are specified as: @@ -152,6 +164,8 @@ static inline int cap_set_all(kernel_cap_t *effective, * I: any raised capabilities must be a subset of the (old current) permitted * P: any raised capabilities must be a subset of the (old current) permitted * E: must be set to a subset of (new target) permitted + * + * Returns 0 on success and < 0 on error. */ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 984c0bf3807f..8ab1b4e518b8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -398,21 +398,31 @@ static int cpuset_path(const struct cpuset *cs, char *buf, int buflen) * to continue to serve a useful existence. Next time it's released, * we will get notified again, if it still has 'notify_on_release' set. * - * Note final arg to call_usermodehelper() is 0 - that means - * don't wait. Since we are holding the global cpuset_sem here, - * and we are asking another thread (started from keventd) to rmdir a - * cpuset, we can't wait - or we'd deadlock with the removing thread - * on cpuset_sem. + * The final arg to call_usermodehelper() is 0, which means don't + * wait. The separate /sbin/cpuset_release_agent task is forked by + * call_usermodehelper(), then control in this thread returns here, + * without waiting for the release agent task. We don't bother to + * wait because the caller of this routine has no use for the exit + * status of the /sbin/cpuset_release_agent task, so no sense holding + * our caller up for that. + * + * The simple act of forking that task might require more memory, + * which might need cpuset_sem. So this routine must be called while + * cpuset_sem is not held, to avoid a possible deadlock. See also + * comments for check_for_release(), below. */ -static int cpuset_release_agent(char *cpuset_str) +static void cpuset_release_agent(const char *pathbuf) { char *argv[3], *envp[3]; int i; + if (!pathbuf) + return; + i = 0; argv[i++] = "/sbin/cpuset_release_agent"; - argv[i++] = cpuset_str; + argv[i++] = (char *)pathbuf; argv[i] = NULL; i = 0; @@ -421,17 +431,29 @@ static int cpuset_release_agent(char *cpuset_str) envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[i] = NULL; - return call_usermodehelper(argv[0], argv, envp, 0); + call_usermodehelper(argv[0], argv, envp, 0); + kfree(pathbuf); } /* * Either cs->count of using tasks transitioned to zero, or the * cs->children list of child cpusets just became empty. If this * cs is notify_on_release() and now both the user count is zero and - * the list of children is empty, send notice to user land. + * the list of children is empty, prepare cpuset path in a kmalloc'd + * buffer, to be returned via ppathbuf, so that the caller can invoke + * cpuset_release_agent() with it later on, once cpuset_sem is dropped. + * Call here with cpuset_sem held. + * + * This check_for_release() routine is responsible for kmalloc'ing + * pathbuf. The above cpuset_release_agent() is responsible for + * kfree'ing pathbuf. The caller of these routines is responsible + * for providing a pathbuf pointer, initialized to NULL, then + * calling check_for_release() with cpuset_sem held and the address + * of the pathbuf pointer, then dropping cpuset_sem, then calling + * cpuset_release_agent() with pathbuf, as set by check_for_release(). */ -static void check_for_release(struct cpuset *cs) +static void check_for_release(struct cpuset *cs, char **ppathbuf) { if (notify_on_release(cs) && atomic_read(&cs->count) == 0 && list_empty(&cs->children)) { @@ -441,10 +463,9 @@ static void check_for_release(struct cpuset *cs) if (!buf) return; if (cpuset_path(cs, buf, PAGE_SIZE) < 0) - goto out; - cpuset_release_agent(buf); -out: - kfree(buf); + kfree(buf); + else + *ppathbuf = buf; } } @@ -606,6 +627,14 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) * Call with cpuset_sem held. May nest a call to the * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. */ + +/* + * Hack to avoid 2.6.13 partial node dynamic sched domain bug. + * Disable letting 'cpu_exclusive' cpusets define dynamic sched + * domains, until the sched domain can handle partial nodes. + * Remove this #if hackery when sched domains fixed. + */ +#if 0 static void update_cpu_domains(struct cpuset *cur) { struct cpuset *c, *par = cur->parent; @@ -646,6 +675,11 @@ static void update_cpu_domains(struct cpuset *cur) partition_sched_domains(&pspan, &cspan); unlock_cpu_hotplug(); } +#else +static void update_cpu_domains(struct cpuset *cur) +{ +} +#endif static int update_cpumask(struct cpuset *cs, char *buf) { @@ -727,14 +761,14 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) return 0; } -static int attach_task(struct cpuset *cs, char *buf) +static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) { pid_t pid; struct task_struct *tsk; struct cpuset *oldcs; cpumask_t cpus; - if (sscanf(buf, "%d", &pid) != 1) + if (sscanf(pidbuf, "%d", &pid) != 1) return -EIO; if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) return -ENOSPC; @@ -777,7 +811,7 @@ static int attach_task(struct cpuset *cs, char *buf) put_task_struct(tsk); if (atomic_dec_and_test(&oldcs->count)) - check_for_release(oldcs); + check_for_release(oldcs, ppathbuf); return 0; } @@ -801,6 +835,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us struct cftype *cft = __d_cft(file->f_dentry); cpuset_filetype_t type = cft->private; char *buffer; + char *pathbuf = NULL; int retval = 0; /* Crude upper limit on largest legitimate cpulist user might write. */ @@ -841,7 +876,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); break; case FILE_TASKLIST: - retval = attach_task(cs, buffer); + retval = attach_task(cs, buffer, &pathbuf); break; default: retval = -EINVAL; @@ -852,6 +887,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us retval = nbytes; out2: up(&cpuset_sem); + cpuset_release_agent(pathbuf); out1: kfree(buffer); return retval; @@ -1357,6 +1393,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) struct cpuset *cs = dentry->d_fsdata; struct dentry *d; struct cpuset *parent; + char *pathbuf = NULL; /* the vfs holds both inode->i_sem already */ @@ -1376,7 +1413,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) update_cpu_domains(cs); list_del(&cs->sibling); /* delete my sibling from parent->children */ if (list_empty(&parent->children)) - check_for_release(parent); + check_for_release(parent, &pathbuf); spin_lock(&cs->dentry->d_lock); d = dget(cs->dentry); cs->dentry = NULL; @@ -1384,6 +1421,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) cpuset_d_remove_dir(d); dput(d); up(&cpuset_sem); + cpuset_release_agent(pathbuf); return 0; } @@ -1440,10 +1478,10 @@ void __init cpuset_init_smp(void) /** * cpuset_fork - attach newly forked task to its parents cpuset. - * @p: pointer to task_struct of forking parent process. + * @tsk: pointer to task_struct of forking parent process. * * Description: By default, on fork, a task inherits its - * parents cpuset. The pointer to the shared cpuset is + * parent's cpuset. The pointer to the shared cpuset is * automatically copied in fork.c by dup_task_struct(). * This cpuset_fork() routine need only increment the usage * counter in that cpuset. @@ -1471,7 +1509,6 @@ void cpuset_fork(struct task_struct *tsk) * by the cpuset_sem semaphore. If you don't hold cpuset_sem, * then a zero cpuset use count is a license to any other task to * nuke the cpuset immediately. - * **/ void cpuset_exit(struct task_struct *tsk) @@ -1484,10 +1521,13 @@ void cpuset_exit(struct task_struct *tsk) task_unlock(tsk); if (notify_on_release(cs)) { + char *pathbuf = NULL; + down(&cpuset_sem); if (atomic_dec_and_test(&cs->count)) - check_for_release(cs); + check_for_release(cs, &pathbuf); up(&cpuset_sem); + cpuset_release_agent(pathbuf); } else { atomic_dec(&cs->count); } @@ -1521,7 +1561,9 @@ void cpuset_init_current_mems_allowed(void) current->mems_allowed = NODE_MASK_ALL; } -/* +/** + * cpuset_update_current_mems_allowed - update mems parameters to new values + * * If the current tasks cpusets mems_allowed changed behind our backs, * update current->mems_allowed and mems_generation to the new value. * Do not call this routine if in_interrupt(). @@ -1540,13 +1582,20 @@ void cpuset_update_current_mems_allowed(void) } } +/** + * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed + * @nodes: pointer to a node bitmap that is and-ed with mems_allowed + */ void cpuset_restrict_to_mems_allowed(unsigned long *nodes) { bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed), MAX_NUMNODES); } -/* +/** + * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed + * @zl: the zonelist to be checked + * * Are any of the nodes on zonelist zl allowed in current->mems_allowed? */ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) @@ -1562,8 +1611,12 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) return 0; } -/* - * Is 'current' valid, and is zone z allowed in current->mems_allowed? +/** + * cpuset_zone_allowed - is zone z allowed in current->mems_allowed + * @z: zone in question + * + * Is zone z allowed in current->mems_allowed, or is + * the CPU in interrupt context? (zone is always allowed in this case) */ int cpuset_zone_allowed(struct zone *z) { diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c index 459ba49e376a..334c37f5218a 100644 --- a/kernel/crash_dump.c +++ b/kernel/crash_dump.c @@ -18,7 +18,16 @@ /* Stores the physical address of elf header of crash image. */ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; -/* +/** + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * * Copy a page from "oldmem". For this page, there is no pte mapped * in the current kernel. We stitch up a pte, similar to kmap_atomic. */ diff --git a/kernel/exit.c b/kernel/exit.c index 9d1b10ed0135..5b0fb9f09f21 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -829,8 +829,10 @@ fastcall NORET_TYPE void do_exit(long code) acct_update_integrals(tsk); update_mem_hiwater(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); - if (group_dead) + if (group_dead) { + del_timer_sync(&tsk->signal->real_timer); acct_process(code); + } exit_mm(tsk); exit_sem(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index cdef6cea8900..7e1ead9a6ba4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -208,8 +208,10 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { + long pages = vma_pages(mpnt); + mm->total_vm -= pages; __vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, - -vma_pages(mpnt)); + -pages); continue; } charge = 0; @@ -992,6 +994,9 @@ static task_t *copy_process(unsigned long clone_flags, * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); +#ifdef TIF_SYSCALL_EMU + clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); +#endif /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ diff --git a/kernel/itimer.c b/kernel/itimer.c index a72cb0e5aa4b..7c1b25e25e47 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -112,28 +112,11 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value) return error; } -/* - * Called with P->sighand->siglock held and P->signal->real_timer inactive. - * If interval is nonzero, arm the timer for interval ticks from now. - */ -static inline void it_real_arm(struct task_struct *p, unsigned long interval) -{ - p->signal->it_real_value = interval; /* XXX unnecessary field?? */ - if (interval == 0) - return; - if (interval > (unsigned long) LONG_MAX) - interval = LONG_MAX; - /* the "+ 1" below makes sure that the timer doesn't go off before - * the interval requested. This could happen if - * time requested % (usecs per jiffy) is more than the usecs left - * in the current jiffy */ - p->signal->real_timer.expires = jiffies + interval + 1; - add_timer(&p->signal->real_timer); -} void it_real_fn(unsigned long __data) { struct task_struct * p = (struct task_struct *) __data; + unsigned long inc = p->signal->it_real_incr; send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p); @@ -141,14 +124,23 @@ void it_real_fn(unsigned long __data) * Now restart the timer if necessary. We don't need any locking * here because do_setitimer makes sure we have finished running * before it touches anything. + * Note, we KNOW we are (or should be) at a jiffie edge here so + * we don't need the +1 stuff. Also, we want to use the prior + * expire value so as to not "slip" a jiffie if we are late. + * Deal with requesting a time prior to "now" here rather than + * in add_timer. */ - it_real_arm(p, p->signal->it_real_incr); + if (!inc) + return; + while (time_before_eq(p->signal->real_timer.expires, jiffies)) + p->signal->real_timer.expires += inc; + add_timer(&p->signal->real_timer); } int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) { struct task_struct *tsk = current; - unsigned long val, interval; + unsigned long val, interval, expires; cputime_t cval, cinterval, nval, ninterval; switch (which) { @@ -164,7 +156,10 @@ again: } tsk->signal->it_real_incr = timeval_to_jiffies(&value->it_interval); - it_real_arm(tsk, timeval_to_jiffies(&value->it_value)); + expires = timeval_to_jiffies(&value->it_value); + if (expires) + mod_timer(&tsk->signal->real_timer, + jiffies + 1 + expires); spin_unlock_irq(&tsk->sighand->siglock); if (ovalue) { jiffies_to_timeval(val, &ovalue->it_value); diff --git a/kernel/module.c b/kernel/module.c index 068e271ab3a5..c32995fbd8fd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -250,13 +250,18 @@ static inline unsigned int block_size(int val) /* Created by linker magic */ extern char __per_cpu_start[], __per_cpu_end[]; -static void *percpu_modalloc(unsigned long size, unsigned long align) +static void *percpu_modalloc(unsigned long size, unsigned long align, + const char *name) { unsigned long extra; unsigned int i; void *ptr; - BUG_ON(align > SMP_CACHE_BYTES); + if (align > SMP_CACHE_BYTES) { + printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n", + name, align, SMP_CACHE_BYTES); + align = SMP_CACHE_BYTES; + } ptr = __per_cpu_start; for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { @@ -348,7 +353,8 @@ static int percpu_modinit(void) } __initcall(percpu_modinit); #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align) +static inline void *percpu_modalloc(unsigned long size, unsigned long align, + const char *name) { return NULL; } @@ -1644,7 +1650,8 @@ static struct module *load_module(void __user *umod, if (pcpuindex) { /* We have a special allocation for this section. */ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign); + sechdrs[pcpuindex].sh_addralign, + mod->name); if (!percpu) { err = -ENOMEM; goto free_mod; diff --git a/kernel/panic.c b/kernel/panic.c index 74ba5f3e46c7..aabc5f86fa3f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -111,12 +111,11 @@ NORET_TYPE void panic(const char * fmt, ...) mdelay(1); i++; } - /* - * Should we run the reboot notifier. For the moment Im - * choosing not too. It might crash, be corrupt or do - * more harm than good for other reasons. + /* This will not be a clean reboot, with everything + * shutting down. But if there is a chance of + * rebooting the system it will be rebooted. */ - machine_restart(NULL); + emergency_restart(); } #ifdef __sparc__ { diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5b7b4736d82b..38798a2ff994 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -896,21 +896,10 @@ static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, jiffies_64_f = get_jiffies_64(); } /* - * Take away now to get delta + * Take away now to get delta and normalize */ - oc.tv_sec -= now.tv_sec; - oc.tv_nsec -= now.tv_nsec; - /* - * Normalize... - */ - while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) { - oc.tv_nsec -= NSEC_PER_SEC; - oc.tv_sec++; - } - while ((oc.tv_nsec) < 0) { - oc.tv_nsec += NSEC_PER_SEC; - oc.tv_sec--; - } + set_normalized_timespec(&oc, oc.tv_sec - now.tv_sec, + oc.tv_nsec - now.tv_nsec); }else{ jiffies_64_f = get_jiffies_64(); } @@ -1177,7 +1166,6 @@ void exit_itimers(struct signal_struct *sig) tmr = list_entry(sig->posix_timers.next, struct k_itimer, list); itimer_delete(tmr); } - del_timer_sync(&sig->real_timer); } /* diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 2c7121d9bff1..917066a5767c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -72,6 +72,18 @@ config PM_STD_PARTITION suspended image to. It will simply pick the first available swap device. +config SWSUSP_ENCRYPT + bool "Encrypt suspend image" + depends on SOFTWARE_SUSPEND && CRYPTO=y && (CRYPTO_AES=y || CRYPTO_AES_586=y || CRYPTO_AES_X86_64=y) + default "" + ---help--- + To prevent data gathering from swap after resume you can encrypt + the suspend image with a temporary key that is deleted on + resume. + + Note that the temporary key is stored unencrypted on disk while the + system is suspended. + config SUSPEND_SMP bool depends on HOTPLUG_CPU && X86 && PM diff --git a/kernel/power/disk.c b/kernel/power/disk.c index c51a4d96d4eb..2d8bf054d036 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -16,6 +16,8 @@ #include <linux/device.h> #include <linux/delay.h> #include <linux/fs.h> +#include <linux/mount.h> + #include "power.h" @@ -57,16 +59,13 @@ static void power_down(suspend_disk_method_t mode) error = pm_ops->enter(PM_SUSPEND_DISK); break; case PM_DISK_SHUTDOWN: - printk("Powering off system\n"); - device_shutdown(); - machine_power_off(); + kernel_power_off(); break; case PM_DISK_REBOOT: - device_shutdown(); - machine_restart(NULL); + kernel_restart(NULL); break; } - machine_halt(); + kernel_halt(); /* Valid image is on the disk, if we continue we risk serious data corruption after resume. */ printk(KERN_CRIT "Please power me down manually\n"); @@ -113,24 +112,12 @@ static inline void platform_finish(void) } } -static void finish(void) -{ - device_resume(); - platform_finish(); - thaw_processes(); - enable_nonboot_cpus(); - pm_restore_console(); -} - - static int prepare_processes(void) { int error; pm_prepare_console(); - sys_sync(); - disable_nonboot_cpus(); if (freeze_processes()) { @@ -163,15 +150,6 @@ static void unprepare_processes(void) pm_restore_console(); } -static int prepare_devices(void) -{ - int error; - - if ((error = device_suspend(PMSG_FREEZE))) - printk("Some devices failed to suspend\n"); - return error; -} - /** * pm_suspend_disk - The granpappy of power management. * @@ -188,17 +166,14 @@ int pm_suspend_disk(void) error = prepare_processes(); if (error) return error; - error = prepare_devices(); + error = device_suspend(PMSG_FREEZE); if (error) { + printk("Some devices failed to suspend\n"); unprepare_processes(); return error; } - pr_debug("PM: Attempting to suspend to disk.\n"); - if (pm_disk_mode == PM_DISK_FIRMWARE) - return pm_ops->enter(PM_SUSPEND_DISK); - pr_debug("PM: snapshotting memory.\n"); in_suspend = 1; if ((error = swsusp_suspend())) @@ -209,11 +184,20 @@ int pm_suspend_disk(void) error = swsusp_write(); if (!error) power_down(pm_disk_mode); + else { + /* swsusp_write can not fail in device_resume, + no need to do second device_resume */ + swsusp_free(); + unprepare_processes(); + return error; + } } else pr_debug("PM: Image restored successfully.\n"); + swsusp_free(); Done: - finish(); + device_resume(); + unprepare_processes(); return error; } @@ -234,9 +218,12 @@ static int software_resume(void) { int error; + down(&pm_sem); if (!swsusp_resume_device) { - if (!strlen(resume_file)) + if (!strlen(resume_file)) { + up(&pm_sem); return -ENOENT; + } swsusp_resume_device = name_to_dev_t(resume_file); pr_debug("swsusp: Resume From Partition %s\n", resume_file); } else { @@ -249,6 +236,7 @@ static int software_resume(void) * FIXME: If noresume is specified, we need to find the partition * and reset it back to normal swap space. */ + up(&pm_sem); return 0; } @@ -271,20 +259,24 @@ static int software_resume(void) pr_debug("PM: Preparing devices for restore.\n"); - if ((error = prepare_devices())) + if ((error = device_suspend(PMSG_FREEZE))) { + printk("Some devices failed to suspend\n"); goto Free; + } mb(); pr_debug("PM: Restoring saved image.\n"); swsusp_resume(); pr_debug("PM: Restore failed, recovering.n"); - finish(); + device_resume(); Free: swsusp_free(); Cleanup: unprepare_processes(); Done: + /* For success case, the suspend path will release the lock */ + up(&pm_sem); pr_debug("PM: Resume from disk failed.\n"); return 0; } @@ -391,7 +383,9 @@ static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t if (sscanf(buf, "%u:%u", &maj, &min) == 2) { res = MKDEV(maj,min); if (maj == MAJOR(res) && min == MINOR(res)) { + down(&pm_sem); swsusp_resume_device = res; + up(&pm_sem); printk("Attempting manual resume\n"); noresume = 0; software_resume(); diff --git a/kernel/power/main.c b/kernel/power/main.c index c94cb9e95090..22bdc93cc038 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -19,6 +19,9 @@ #include "power.h" +/*This is just an arbitrary number */ +#define FREE_PAGE_NUMBER (100) + DECLARE_MUTEX(pm_sem); struct pm_ops * pm_ops = NULL; @@ -49,6 +52,7 @@ void pm_set_ops(struct pm_ops * ops) static int suspend_prepare(suspend_state_t state) { int error = 0; + unsigned int free_pages; if (!pm_ops || !pm_ops->enter) return -EPERM; @@ -67,6 +71,16 @@ static int suspend_prepare(suspend_state_t state) goto Thaw; } + if ((free_pages = nr_free_pages()) < FREE_PAGE_NUMBER) { + pr_debug("PM: free some memory\n"); + shrink_all_memory(FREE_PAGE_NUMBER - free_pages); + if (nr_free_pages() < FREE_PAGE_NUMBER) { + error = -ENOMEM; + printk(KERN_ERR "PM: No enough memory\n"); + goto Thaw; + } + } + if (pm_ops->prepare) { if ((error = pm_ops->prepare(state))) goto Thaw; @@ -129,11 +143,12 @@ static void suspend_finish(suspend_state_t state) -static char * pm_states[] = { +static char *pm_states[PM_SUSPEND_MAX] = { [PM_SUSPEND_STANDBY] = "standby", [PM_SUSPEND_MEM] = "mem", +#ifdef CONFIG_SOFTWARE_SUSPEND [PM_SUSPEND_DISK] = "disk", - NULL, +#endif }; @@ -194,7 +209,7 @@ int software_suspend(void) int pm_suspend(suspend_state_t state) { - if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) + if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) return enter_state(state); return -EINVAL; } diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index 715081b2d829..7a4144ba3afd 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -9,6 +9,7 @@ #include <linux/init.h> #include <linux/pm.h> #include <linux/workqueue.h> +#include <linux/reboot.h> /* * When the user hits Sys-Rq o to power down the machine this is the @@ -17,8 +18,7 @@ static void do_poweroff(void *dummy) { - if (pm_power_off) - pm_power_off(); + kernel_power_off(); } static DECLARE_WORK(poweroff_work, do_poweroff, NULL); diff --git a/kernel/power/process.c b/kernel/power/process.c index 3bd0d261818f..28de118f7a0b 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -38,7 +38,6 @@ void refrigerator(void) processes around? */ long save; save = current->state; - current->state = TASK_UNINTERRUPTIBLE; pr_debug("%s entered refrigerator\n", current->comm); printk("="); @@ -47,8 +46,10 @@ void refrigerator(void) recalc_sigpending(); /* We sent fake signal, clean it up */ spin_unlock_irq(¤t->sighand->siglock); - while (frozen(current)) + while (frozen(current)) { + current->state = TASK_UNINTERRUPTIBLE; schedule(); + } pr_debug("%s left refrigerator\n", current->comm); current->state = save; } @@ -80,13 +81,33 @@ int freeze_processes(void) } while_each_thread(g, p); read_unlock(&tasklist_lock); yield(); /* Yield is okay here */ - if (time_after(jiffies, start_time + TIMEOUT)) { + if (todo && time_after(jiffies, start_time + TIMEOUT)) { printk( "\n" ); printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo ); - return todo; + break; } } while(todo); + /* This does not unfreeze processes that are already frozen + * (we have slightly ugly calling convention in that respect, + * and caller must call thaw_processes() if something fails), + * but it cleans up leftover PF_FREEZE requests. + */ + if (todo) { + read_lock(&tasklist_lock); + do_each_thread(g, p) + if (freezing(p)) { + pr_debug(" clean up: %s\n", p->comm); + p->flags &= ~PF_FREEZE; + spin_lock_irqsave(&p->sighand->siglock, flags); + recalc_sigpending_tsk(p); + spin_unlock_irqrestore(&p->sighand->siglock, flags); + } + while_each_thread(g, p); + read_unlock(&tasklist_lock); + return todo; + } + printk( "|\n" ); BUG_ON(in_atomic()); return 0; diff --git a/kernel/power/smp.c b/kernel/power/smp.c index bbe23079c62c..911fc62b8225 100644 --- a/kernel/power/smp.c +++ b/kernel/power/smp.c @@ -38,7 +38,7 @@ void disable_nonboot_cpus(void) } printk("Error taking cpu %d down: %d\n", cpu, error); } - BUG_ON(smp_processor_id() != 0); + BUG_ON(raw_smp_processor_id() != 0); if (error) panic("cpus not sleeping"); } diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 7d7801cd01f0..eaacd5cb5889 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -31,6 +31,9 @@ * Alex Badea <vampire@go.ro>: * Fixed runaway init * + * Andreas Steinmetz <ast@domdv.de>: + * Added encrypted suspend option + * * More state savers are welcome. Especially for the scsi layer... * * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt @@ -63,6 +66,7 @@ #include <linux/console.h> #include <linux/highmem.h> #include <linux/bio.h> +#include <linux/mount.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -70,8 +74,16 @@ #include <asm/tlbflush.h> #include <asm/io.h> +#include <linux/random.h> +#include <linux/crypto.h> +#include <asm/scatterlist.h> + #include "power.h" +#define CIPHER "aes" +#define MAXKEY 32 +#define MAXIV 32 + /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; @@ -102,7 +114,8 @@ static suspend_pagedir_t *pagedir_save; #define SWSUSP_SIG "S1SUSPEND" static struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; + char reserved[PAGE_SIZE - 20 - MAXKEY - MAXIV - sizeof(swp_entry_t)]; + u8 key_iv[MAXKEY+MAXIV]; swp_entry_t swsusp_info; char orig_sig[10]; char sig[10]; @@ -128,6 +141,131 @@ static struct swsusp_info swsusp_info; static unsigned short swapfile_used[MAX_SWAPFILES]; static unsigned short root_swap; +static int write_page(unsigned long addr, swp_entry_t * loc); +static int bio_read_page(pgoff_t page_off, void * page); + +static u8 key_iv[MAXKEY+MAXIV]; + +#ifdef CONFIG_SWSUSP_ENCRYPT + +static int crypto_init(int mode, void **mem) +{ + int error = 0; + int len; + char *modemsg; + struct crypto_tfm *tfm; + + modemsg = mode ? "suspend not possible" : "resume not possible"; + + tfm = crypto_alloc_tfm(CIPHER, CRYPTO_TFM_MODE_CBC); + if(!tfm) { + printk(KERN_ERR "swsusp: no tfm, %s\n", modemsg); + error = -EINVAL; + goto out; + } + + if(MAXKEY < crypto_tfm_alg_min_keysize(tfm)) { + printk(KERN_ERR "swsusp: key buffer too small, %s\n", modemsg); + error = -ENOKEY; + goto fail; + } + + if (mode) + get_random_bytes(key_iv, MAXKEY+MAXIV); + + len = crypto_tfm_alg_max_keysize(tfm); + if (len > MAXKEY) + len = MAXKEY; + + if (crypto_cipher_setkey(tfm, key_iv, len)) { + printk(KERN_ERR "swsusp: key setup failure, %s\n", modemsg); + error = -EKEYREJECTED; + goto fail; + } + + len = crypto_tfm_alg_ivsize(tfm); + + if (MAXIV < len) { + printk(KERN_ERR "swsusp: iv buffer too small, %s\n", modemsg); + error = -EOVERFLOW; + goto fail; + } + + crypto_cipher_set_iv(tfm, key_iv+MAXKEY, len); + + *mem=(void *)tfm; + + goto out; + +fail: crypto_free_tfm(tfm); +out: return error; +} + +static __inline__ void crypto_exit(void *mem) +{ + crypto_free_tfm((struct crypto_tfm *)mem); +} + +static __inline__ int crypto_write(struct pbe *p, void *mem) +{ + int error = 0; + struct scatterlist src, dst; + + src.page = virt_to_page(p->address); + src.offset = 0; + src.length = PAGE_SIZE; + dst.page = virt_to_page((void *)&swsusp_header); + dst.offset = 0; + dst.length = PAGE_SIZE; + + error = crypto_cipher_encrypt((struct crypto_tfm *)mem, &dst, &src, + PAGE_SIZE); + + if (!error) + error = write_page((unsigned long)&swsusp_header, + &(p->swap_address)); + return error; +} + +static __inline__ int crypto_read(struct pbe *p, void *mem) +{ + int error = 0; + struct scatterlist src, dst; + + error = bio_read_page(swp_offset(p->swap_address), (void *)p->address); + if (!error) { + src.offset = 0; + src.length = PAGE_SIZE; + dst.offset = 0; + dst.length = PAGE_SIZE; + src.page = dst.page = virt_to_page((void *)p->address); + + error = crypto_cipher_decrypt((struct crypto_tfm *)mem, &dst, + &src, PAGE_SIZE); + } + return error; +} +#else +static __inline__ int crypto_init(int mode, void *mem) +{ + return 0; +} + +static __inline__ void crypto_exit(void *mem) +{ +} + +static __inline__ int crypto_write(struct pbe *p, void *mem) +{ + return write_page(p->address, &(p->swap_address)); +} + +static __inline__ int crypto_read(struct pbe *p, void *mem) +{ + return bio_read_page(swp_offset(p->swap_address), (void *)p->address); +} +#endif + static int mark_swapfiles(swp_entry_t prev) { int error; @@ -139,6 +277,7 @@ static int mark_swapfiles(swp_entry_t prev) !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); memcpy(swsusp_header.sig,SWSUSP_SIG, 10); + memcpy(swsusp_header.key_iv, key_iv, MAXKEY+MAXIV); swsusp_header.swsusp_info = prev; error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), @@ -178,9 +317,9 @@ static int swsusp_swap_check(void) /* This is called before saving image */ len=strlen(resume_file); root_swap = 0xFFFF; - swap_list_lock(); + spin_lock(&swap_lock); for (i=0; i<MAX_SWAPFILES; i++) { - if (swap_info[i].flags == 0) { + if (!(swap_info[i].flags & SWP_WRITEOK)) { swapfile_used[i]=SWAPFILE_UNUSED; } else { if (!len) { @@ -201,7 +340,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */ } } } - swap_list_unlock(); + spin_unlock(&swap_lock); return (root_swap != 0xffff) ? 0 : -ENODEV; } @@ -215,12 +354,12 @@ static void lock_swapdevices(void) { int i; - swap_list_lock(); + spin_lock(&swap_lock); for (i = 0; i< MAX_SWAPFILES; i++) if (swapfile_used[i] == SWAPFILE_IGNORED) { - swap_info[i].flags ^= 0xFF; + swap_info[i].flags ^= SWP_WRITEOK; } - swap_list_unlock(); + spin_unlock(&swap_lock); } /** @@ -285,6 +424,10 @@ static int data_write(void) int error = 0, i = 0; unsigned int mod = nr_copy_pages / 100; struct pbe *p; + void *tfm; + + if ((error = crypto_init(1, &tfm))) + return error; if (!mod) mod = 1; @@ -293,11 +436,14 @@ static int data_write(void) for_each_pbe (p, pagedir_nosave) { if (!(i%mod)) printk( "\b\b\b\b%3d%%", i / mod ); - if ((error = write_page(p->address, &(p->swap_address)))) + if ((error = crypto_write(p, tfm))) { + crypto_exit(tfm); return error; + } i++; } printk("\b\b\b\bdone\n"); + crypto_exit(tfm); return error; } @@ -384,7 +530,6 @@ static int write_pagedir(void) * write_suspend_image - Write entire image and metadata. * */ - static int write_suspend_image(void) { int error; @@ -399,6 +544,7 @@ static int write_suspend_image(void) if ((error = close_swap())) goto FreePagedir; Done: + memset(key_iv, 0, MAXKEY+MAXIV); return error; FreePagedir: free_pagedir_entries(); @@ -590,18 +736,7 @@ static void copy_data_pages(void) static int calc_nr(int nr_copy) { - int extra = 0; - int mod = !!(nr_copy % PBES_PER_PAGE); - int diff = (nr_copy / PBES_PER_PAGE) + mod; - - do { - extra += diff; - nr_copy += diff; - mod = !!(nr_copy % PBES_PER_PAGE); - diff = (nr_copy / PBES_PER_PAGE) + mod - extra; - } while (diff > 0); - - return nr_copy; + return nr_copy + (nr_copy+PBES_PER_PAGE-2)/(PBES_PER_PAGE-1); } /** @@ -885,20 +1020,21 @@ int swsusp_suspend(void) * at resume time, and evil weirdness ensues. */ if ((error = device_power_down(PMSG_FREEZE))) { + printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); local_irq_enable(); return error; } if ((error = swsusp_swap_check())) { - printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try " - "swapon -a!\n"); + printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); + device_power_up(); local_irq_enable(); return error; } save_processor_state(); if ((error = swsusp_arch_suspend())) - printk("Error %d suspending\n", error); + printk(KERN_ERR "Error %d suspending\n", error); /* Restore control flow magically appears here */ restore_processor_state(); BUG_ON (nr_copy_pages_check != nr_copy_pages); @@ -1178,7 +1314,8 @@ static const char * sanity_check(void) if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) return "machine"; #if 0 - if(swsusp_info.cpus != num_online_cpus()) + /* We can't use number of online CPUs when we use hotplug to remove them ;-))) */ + if (swsusp_info.cpus != num_possible_cpus()) return "number of cpus"; #endif return NULL; @@ -1211,13 +1348,14 @@ static int check_sig(void) return error; if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); + memcpy(key_iv, swsusp_header.key_iv, MAXKEY+MAXIV); + memset(swsusp_header.key_iv, 0, MAXKEY+MAXIV); /* * Reset swap signature now. */ error = bio_write_page(0, &swsusp_header); } else { - printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n"); return -EINVAL; } if (!error) @@ -1238,6 +1376,10 @@ static int data_read(struct pbe *pblist) int error = 0; int i = 0; int mod = swsusp_info.image_pages / 100; + void *tfm; + + if ((error = crypto_init(0, &tfm))) + return error; if (!mod) mod = 1; @@ -1249,19 +1391,18 @@ static int data_read(struct pbe *pblist) if (!(i % mod)) printk("\b\b\b\b%3d%%", i / mod); - error = bio_read_page(swp_offset(p->swap_address), - (void *)p->address); - if (error) + if ((error = crypto_read(p, tfm))) { + crypto_exit(tfm); return error; + } i++; } printk("\b\b\b\bdone\n"); + crypto_exit(tfm); return error; } -extern dev_t name_to_dev_t(const char *line); - /** * read_pagedir - Read page backup list pages from swap */ @@ -1386,6 +1527,7 @@ int swsusp_read(void) error = read_suspend_image(); blkdev_put(resume_bdev); + memset(key_iv, 0, MAXKEY+MAXIV); if (!error) pr_debug("swsusp: Reading resume file was successful\n"); diff --git a/kernel/sched.c b/kernel/sched.c index 4107db0dc091..5f889d0cbfcc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3378,8 +3378,8 @@ EXPORT_SYMBOL(set_user_nice); */ int can_nice(const task_t *p, const int nice) { - /* convert nice value [19,-20] to rlimit style value [0,39] */ - int nice_rlim = 19 - nice; + /* convert nice value [19,-20] to rlimit style value [1,40] */ + int nice_rlim = 20 - nice; return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || capable(CAP_SYS_NICE)); } @@ -3486,7 +3486,7 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) p->policy = policy; p->rt_priority = prio; if (policy != SCHED_NORMAL) - p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority; + p->prio = MAX_RT_PRIO-1 - p->rt_priority; else p->prio = p->static_prio; } @@ -3518,7 +3518,8 @@ recheck: * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. */ if (param->sched_priority < 0 || - param->sched_priority > MAX_USER_RT_PRIO-1) + (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || + (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) return -EINVAL; if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) return -EINVAL; @@ -3528,7 +3529,8 @@ recheck: */ if (!capable(CAP_SYS_NICE)) { /* can't change policy */ - if (policy != p->policy) + if (policy != p->policy && + !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) return -EPERM; /* can't increase priority */ if (policy != SCHED_NORMAL && diff --git a/kernel/signal.c b/kernel/signal.c index ca1186eef938..d282fea81138 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -692,7 +692,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) { struct task_struct *t; - if (p->flags & SIGNAL_GROUP_EXIT) + if (p->signal->flags & SIGNAL_GROUP_EXIT) /* * The process is in the middle of dying already. */ diff --git a/kernel/sys.c b/kernel/sys.c index 9a24374c23bc..0bcaed6560ac 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -361,6 +361,64 @@ out_unlock: return retval; } +void emergency_restart(void) +{ + machine_emergency_restart(); +} +EXPORT_SYMBOL_GPL(emergency_restart); + +void kernel_restart(char *cmd) +{ + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); + system_state = SYSTEM_RESTART; + device_shutdown(); + if (!cmd) { + printk(KERN_EMERG "Restarting system.\n"); + } else { + printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); + } + printk(".\n"); + machine_restart(cmd); +} +EXPORT_SYMBOL_GPL(kernel_restart); + +void kernel_kexec(void) +{ +#ifdef CONFIG_KEXEC + struct kimage *image; + image = xchg(&kexec_image, 0); + if (!image) { + return; + } + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); + system_state = SYSTEM_RESTART; + device_shutdown(); + printk(KERN_EMERG "Starting new kernel\n"); + machine_shutdown(); + machine_kexec(image); +#endif +} +EXPORT_SYMBOL_GPL(kernel_kexec); + +void kernel_halt(void) +{ + notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); + system_state = SYSTEM_HALT; + device_shutdown(); + printk(KERN_EMERG "System halted.\n"); + machine_halt(); +} +EXPORT_SYMBOL_GPL(kernel_halt); + +void kernel_power_off(void) +{ + notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); + system_state = SYSTEM_POWER_OFF; + device_shutdown(); + printk(KERN_EMERG "Power down.\n"); + machine_power_off(); +} +EXPORT_SYMBOL_GPL(kernel_power_off); /* * Reboot system call: for obvious reasons only root may call it, @@ -389,11 +447,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user lock_kernel(); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: - notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); - system_state = SYSTEM_RESTART; - device_shutdown(); - printk(KERN_EMERG "Restarting system.\n"); - machine_restart(NULL); + kernel_restart(NULL); break; case LINUX_REBOOT_CMD_CAD_ON: @@ -405,23 +459,13 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user break; case LINUX_REBOOT_CMD_HALT: - notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); - system_state = SYSTEM_HALT; - device_suspend(PMSG_SUSPEND); - device_shutdown(); - printk(KERN_EMERG "System halted.\n"); - machine_halt(); + kernel_halt(); unlock_kernel(); do_exit(0); break; case LINUX_REBOOT_CMD_POWER_OFF: - notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); - system_state = SYSTEM_POWER_OFF; - device_suspend(PMSG_SUSPEND); - device_shutdown(); - printk(KERN_EMERG "Power down.\n"); - machine_power_off(); + kernel_power_off(); unlock_kernel(); do_exit(0); break; @@ -433,32 +477,14 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user } buffer[sizeof(buffer) - 1] = '\0'; - notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer); - system_state = SYSTEM_RESTART; - device_suspend(PMSG_FREEZE); - device_shutdown(); - printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer); - machine_restart(buffer); + kernel_restart(buffer); break; -#ifdef CONFIG_KEXEC case LINUX_REBOOT_CMD_KEXEC: - { - struct kimage *image; - image = xchg(&kexec_image, 0); - if (!image) { - unlock_kernel(); - return -EINVAL; - } - notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); - system_state = SYSTEM_RESTART; - device_shutdown(); - printk(KERN_EMERG "Starting new kernel\n"); - machine_shutdown(); - machine_kexec(image); - break; - } -#endif + kernel_kexec(); + unlock_kernel(); + return -EINVAL; + #ifdef CONFIG_SOFTWARE_SUSPEND case LINUX_REBOOT_CMD_SW_SUSPEND: { @@ -478,8 +504,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user static void deferred_cad(void *dummy) { - notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); - machine_restart(NULL); + kernel_restart(NULL); } /* diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 29196ce9b40f..1ab2370e2efa 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -79,7 +79,9 @@ cond_syscall(sys_request_key); cond_syscall(sys_keyctl); cond_syscall(compat_sys_keyctl); cond_syscall(compat_sys_socketcall); -cond_syscall(sys_set_zone_reclaim); +cond_syscall(sys_inotify_init); +cond_syscall(sys_inotify_add_watch); +cond_syscall(sys_inotify_rm_watch); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 270ee7fadbd8..8e56e2495542 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -31,6 +31,7 @@ #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/net.h> #include <linux/sysrq.h> #include <linux/highuid.h> #include <linux/writeback.h> @@ -114,6 +115,7 @@ extern int unaligned_enabled; extern int sysctl_ieee_emulation_warnings; #endif extern int sysctl_userprocess_debug; +extern int spin_retry; #endif extern int sysctl_hz_timer; @@ -135,9 +137,6 @@ static struct ctl_table_header root_table_header = static ctl_table kern_table[]; static ctl_table vm_table[]; -#ifdef CONFIG_NET -extern ctl_table net_table[]; -#endif static ctl_table proc_table[]; static ctl_table fs_table[]; static ctl_table debug_table[]; @@ -146,6 +145,9 @@ extern ctl_table random_table[]; #ifdef CONFIG_UNIX98_PTYS extern ctl_table pty_table[]; #endif +#ifdef CONFIG_INOTIFY +extern ctl_table inotify_table[]; +#endif #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT int sysctl_legacy_va_layout; @@ -218,6 +220,7 @@ static ctl_table root_table[] = { .mode = 0555, .child = dev_table, }, + { .ctl_name = 0 } }; @@ -643,7 +646,16 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, - +#if defined(CONFIG_ARCH_S390) + { + .ctl_name = KERN_SPIN_RETRY, + .procname = "spin_retry", + .data = &spin_retry, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = 0 } }; @@ -950,6 +962,14 @@ static ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_INOTIFY + { + .ctl_name = FS_INOTIFY, + .procname = "inotify", + .mode = 0555, + .child = inotify_table, + }, +#endif #endif { .ctl_name = KERN_SETUID_DUMPABLE, @@ -968,7 +988,7 @@ static ctl_table debug_table[] = { static ctl_table dev_table[] = { { .ctl_name = 0 } -}; +}; extern void init_irq_proc (void); diff --git a/kernel/time.c b/kernel/time.c index d4335c1c884c..dd5ae1162a8f 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -128,7 +128,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __us * as real UNIX machines always do it. This avoids all headaches about * daylight saving times and warping kernel clocks. */ -inline static void warp_clock(void) +static inline void warp_clock(void) { write_seqlock_irq(&xtime_lock); wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; diff --git a/kernel/timer.c b/kernel/timer.c index f2a11887a726..5377f40723ff 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1023,7 +1023,7 @@ asmlinkage long sys_getppid(void) parent = me->group_leader->real_parent; for (;;) { pid = parent->tgid; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) { struct task_struct *old = parent; diff --git a/kernel/user.c b/kernel/user.c index 734575d55769..89e562feb1b1 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -120,6 +120,10 @@ struct user_struct * alloc_uid(uid_t uid) atomic_set(&new->processes, 0); atomic_set(&new->files, 0); atomic_set(&new->sigpending, 0); +#ifdef CONFIG_INOTIFY + atomic_set(&new->inotify_watches, 0); + atomic_set(&new->inotify_devs, 0); +#endif new->mq_bytes = 0; new->locked_shm = 0; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 259cf55da3c9..c7e36d4a70ca 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -308,8 +308,6 @@ struct workqueue_struct *__create_workqueue(const char *name, struct workqueue_struct *wq; struct task_struct *p; - BUG_ON(strlen(name) > 10); - wq = kmalloc(sizeof(*wq), GFP_KERNEL); if (!wq) return NULL; |
