summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c3
-rw-r--r--kernel/capability.c20
-rw-r--r--kernel/cpuset.c107
-rw-r--r--kernel/crash_dump.c11
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/itimer.c37
-rw-r--r--kernel/module.c15
-rw-r--r--kernel/panic.c9
-rw-r--r--kernel/posix-timers.c18
-rw-r--r--kernel/power/Kconfig12
-rw-r--r--kernel/power/disk.c66
-rw-r--r--kernel/power/main.c21
-rw-r--r--kernel/power/poweroff.c4
-rw-r--r--kernel/power/process.c29
-rw-r--r--kernel/power/smp.c2
-rw-r--r--kernel/power/swsusp.c204
-rw-r--r--kernel/sched.c12
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sys.c109
-rw-r--r--kernel/sys_ni.c4
-rw-r--r--kernel/sysctl.c30
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/user.c4
-rw-r--r--kernel/workqueue.c2
26 files changed, 522 insertions, 214 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index ef35166fdc29..7f0699790d46 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -514,7 +514,8 @@ static int __init audit_init(void)
{
printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
audit_default ? "enabled" : "disabled");
- audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive);
+ audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
+ THIS_MODULE);
if (!audit_sock)
audit_panic("cannot initialize netlink socket");
diff --git a/kernel/capability.c b/kernel/capability.c
index 64db1ee820c2..8986a37a67ea 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -31,8 +31,14 @@ static DEFINE_SPINLOCK(task_capability_lock);
* uninteresting and/or not to be changed.
*/
-/*
+/**
* sys_capget - get the capabilities of a given process.
+ * @header: pointer to struct that contains capability version and
+ * target pid data
+ * @dataptr: pointer to struct that contains the effective, permitted,
+ * and inheritable capabilities that are returned
+ *
+ * Returns 0 on success and < 0 on error.
*/
asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
{
@@ -141,8 +147,14 @@ static inline int cap_set_all(kernel_cap_t *effective,
return ret;
}
-/*
- * sys_capset - set capabilities for a given process, all processes, or all
+/**
+ * sys_capset - set capabilities for a process or a group of processes
+ * @header: pointer to struct that contains capability version and
+ * target pid data
+ * @data: pointer to struct that contains the effective, permitted,
+ * and inheritable capabilities
+ *
+ * Set capabilities for a given process, all processes, or all
* processes in a given process group.
*
* The restrictions on setting capabilities are specified as:
@@ -152,6 +164,8 @@ static inline int cap_set_all(kernel_cap_t *effective,
* I: any raised capabilities must be a subset of the (old current) permitted
* P: any raised capabilities must be a subset of the (old current) permitted
* E: must be set to a subset of (new target) permitted
+ *
+ * Returns 0 on success and < 0 on error.
*/
asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 984c0bf3807f..8ab1b4e518b8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -398,21 +398,31 @@ static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
* to continue to serve a useful existence. Next time it's released,
* we will get notified again, if it still has 'notify_on_release' set.
*
- * Note final arg to call_usermodehelper() is 0 - that means
- * don't wait. Since we are holding the global cpuset_sem here,
- * and we are asking another thread (started from keventd) to rmdir a
- * cpuset, we can't wait - or we'd deadlock with the removing thread
- * on cpuset_sem.
+ * The final arg to call_usermodehelper() is 0, which means don't
+ * wait. The separate /sbin/cpuset_release_agent task is forked by
+ * call_usermodehelper(), then control in this thread returns here,
+ * without waiting for the release agent task. We don't bother to
+ * wait because the caller of this routine has no use for the exit
+ * status of the /sbin/cpuset_release_agent task, so no sense holding
+ * our caller up for that.
+ *
+ * The simple act of forking that task might require more memory,
+ * which might need cpuset_sem. So this routine must be called while
+ * cpuset_sem is not held, to avoid a possible deadlock. See also
+ * comments for check_for_release(), below.
*/
-static int cpuset_release_agent(char *cpuset_str)
+static void cpuset_release_agent(const char *pathbuf)
{
char *argv[3], *envp[3];
int i;
+ if (!pathbuf)
+ return;
+
i = 0;
argv[i++] = "/sbin/cpuset_release_agent";
- argv[i++] = cpuset_str;
+ argv[i++] = (char *)pathbuf;
argv[i] = NULL;
i = 0;
@@ -421,17 +431,29 @@ static int cpuset_release_agent(char *cpuset_str)
envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp[i] = NULL;
- return call_usermodehelper(argv[0], argv, envp, 0);
+ call_usermodehelper(argv[0], argv, envp, 0);
+ kfree(pathbuf);
}
/*
* Either cs->count of using tasks transitioned to zero, or the
* cs->children list of child cpusets just became empty. If this
* cs is notify_on_release() and now both the user count is zero and
- * the list of children is empty, send notice to user land.
+ * the list of children is empty, prepare cpuset path in a kmalloc'd
+ * buffer, to be returned via ppathbuf, so that the caller can invoke
+ * cpuset_release_agent() with it later on, once cpuset_sem is dropped.
+ * Call here with cpuset_sem held.
+ *
+ * This check_for_release() routine is responsible for kmalloc'ing
+ * pathbuf. The above cpuset_release_agent() is responsible for
+ * kfree'ing pathbuf. The caller of these routines is responsible
+ * for providing a pathbuf pointer, initialized to NULL, then
+ * calling check_for_release() with cpuset_sem held and the address
+ * of the pathbuf pointer, then dropping cpuset_sem, then calling
+ * cpuset_release_agent() with pathbuf, as set by check_for_release().
*/
-static void check_for_release(struct cpuset *cs)
+static void check_for_release(struct cpuset *cs, char **ppathbuf)
{
if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
list_empty(&cs->children)) {
@@ -441,10 +463,9 @@ static void check_for_release(struct cpuset *cs)
if (!buf)
return;
if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
- goto out;
- cpuset_release_agent(buf);
-out:
- kfree(buf);
+ kfree(buf);
+ else
+ *ppathbuf = buf;
}
}
@@ -606,6 +627,14 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
* Call with cpuset_sem held. May nest a call to the
* lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
*/
+
+/*
+ * Hack to avoid 2.6.13 partial node dynamic sched domain bug.
+ * Disable letting 'cpu_exclusive' cpusets define dynamic sched
+ * domains, until the sched domain can handle partial nodes.
+ * Remove this #if hackery when sched domains fixed.
+ */
+#if 0
static void update_cpu_domains(struct cpuset *cur)
{
struct cpuset *c, *par = cur->parent;
@@ -646,6 +675,11 @@ static void update_cpu_domains(struct cpuset *cur)
partition_sched_domains(&pspan, &cspan);
unlock_cpu_hotplug();
}
+#else
+static void update_cpu_domains(struct cpuset *cur)
+{
+}
+#endif
static int update_cpumask(struct cpuset *cs, char *buf)
{
@@ -727,14 +761,14 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
return 0;
}
-static int attach_task(struct cpuset *cs, char *buf)
+static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
{
pid_t pid;
struct task_struct *tsk;
struct cpuset *oldcs;
cpumask_t cpus;
- if (sscanf(buf, "%d", &pid) != 1)
+ if (sscanf(pidbuf, "%d", &pid) != 1)
return -EIO;
if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
return -ENOSPC;
@@ -777,7 +811,7 @@ static int attach_task(struct cpuset *cs, char *buf)
put_task_struct(tsk);
if (atomic_dec_and_test(&oldcs->count))
- check_for_release(oldcs);
+ check_for_release(oldcs, ppathbuf);
return 0;
}
@@ -801,6 +835,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
struct cftype *cft = __d_cft(file->f_dentry);
cpuset_filetype_t type = cft->private;
char *buffer;
+ char *pathbuf = NULL;
int retval = 0;
/* Crude upper limit on largest legitimate cpulist user might write. */
@@ -841,7 +876,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
break;
case FILE_TASKLIST:
- retval = attach_task(cs, buffer);
+ retval = attach_task(cs, buffer, &pathbuf);
break;
default:
retval = -EINVAL;
@@ -852,6 +887,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
retval = nbytes;
out2:
up(&cpuset_sem);
+ cpuset_release_agent(pathbuf);
out1:
kfree(buffer);
return retval;
@@ -1357,6 +1393,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
struct cpuset *cs = dentry->d_fsdata;
struct dentry *d;
struct cpuset *parent;
+ char *pathbuf = NULL;
/* the vfs holds both inode->i_sem already */
@@ -1376,7 +1413,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
update_cpu_domains(cs);
list_del(&cs->sibling); /* delete my sibling from parent->children */
if (list_empty(&parent->children))
- check_for_release(parent);
+ check_for_release(parent, &pathbuf);
spin_lock(&cs->dentry->d_lock);
d = dget(cs->dentry);
cs->dentry = NULL;
@@ -1384,6 +1421,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
cpuset_d_remove_dir(d);
dput(d);
up(&cpuset_sem);
+ cpuset_release_agent(pathbuf);
return 0;
}
@@ -1440,10 +1478,10 @@ void __init cpuset_init_smp(void)
/**
* cpuset_fork - attach newly forked task to its parents cpuset.
- * @p: pointer to task_struct of forking parent process.
+ * @tsk: pointer to task_struct of forking parent process.
*
* Description: By default, on fork, a task inherits its
- * parents cpuset. The pointer to the shared cpuset is
+ * parent's cpuset. The pointer to the shared cpuset is
* automatically copied in fork.c by dup_task_struct().
* This cpuset_fork() routine need only increment the usage
* counter in that cpuset.
@@ -1471,7 +1509,6 @@ void cpuset_fork(struct task_struct *tsk)
* by the cpuset_sem semaphore. If you don't hold cpuset_sem,
* then a zero cpuset use count is a license to any other task to
* nuke the cpuset immediately.
- *
**/
void cpuset_exit(struct task_struct *tsk)
@@ -1484,10 +1521,13 @@ void cpuset_exit(struct task_struct *tsk)
task_unlock(tsk);
if (notify_on_release(cs)) {
+ char *pathbuf = NULL;
+
down(&cpuset_sem);
if (atomic_dec_and_test(&cs->count))
- check_for_release(cs);
+ check_for_release(cs, &pathbuf);
up(&cpuset_sem);
+ cpuset_release_agent(pathbuf);
} else {
atomic_dec(&cs->count);
}
@@ -1521,7 +1561,9 @@ void cpuset_init_current_mems_allowed(void)
current->mems_allowed = NODE_MASK_ALL;
}
-/*
+/**
+ * cpuset_update_current_mems_allowed - update mems parameters to new values
+ *
* If the current tasks cpusets mems_allowed changed behind our backs,
* update current->mems_allowed and mems_generation to the new value.
* Do not call this routine if in_interrupt().
@@ -1540,13 +1582,20 @@ void cpuset_update_current_mems_allowed(void)
}
}
+/**
+ * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed
+ * @nodes: pointer to a node bitmap that is and-ed with mems_allowed
+ */
void cpuset_restrict_to_mems_allowed(unsigned long *nodes)
{
bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed),
MAX_NUMNODES);
}
-/*
+/**
+ * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed
+ * @zl: the zonelist to be checked
+ *
* Are any of the nodes on zonelist zl allowed in current->mems_allowed?
*/
int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
@@ -1562,8 +1611,12 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
return 0;
}
-/*
- * Is 'current' valid, and is zone z allowed in current->mems_allowed?
+/**
+ * cpuset_zone_allowed - is zone z allowed in current->mems_allowed
+ * @z: zone in question
+ *
+ * Is zone z allowed in current->mems_allowed, or is
+ * the CPU in interrupt context? (zone is always allowed in this case)
*/
int cpuset_zone_allowed(struct zone *z)
{
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
index 459ba49e376a..334c37f5218a 100644
--- a/kernel/crash_dump.c
+++ b/kernel/crash_dump.c
@@ -18,7 +18,16 @@
/* Stores the physical address of elf header of crash image. */
unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-/*
+/**
+ * copy_oldmem_page - copy one page from "oldmem"
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ * space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ * otherwise @buf is in kernel address space, use memcpy().
+ *
* Copy a page from "oldmem". For this page, there is no pte mapped
* in the current kernel. We stitch up a pte, similar to kmap_atomic.
*/
diff --git a/kernel/exit.c b/kernel/exit.c
index 9d1b10ed0135..5b0fb9f09f21 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -829,8 +829,10 @@ fastcall NORET_TYPE void do_exit(long code)
acct_update_integrals(tsk);
update_mem_hiwater(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
- if (group_dead)
+ if (group_dead) {
+ del_timer_sync(&tsk->signal->real_timer);
acct_process(code);
+ }
exit_mm(tsk);
exit_sem(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index cdef6cea8900..7e1ead9a6ba4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -208,8 +208,10 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
struct file *file;
if (mpnt->vm_flags & VM_DONTCOPY) {
+ long pages = vma_pages(mpnt);
+ mm->total_vm -= pages;
__vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
- -vma_pages(mpnt));
+ -pages);
continue;
}
charge = 0;
@@ -992,6 +994,9 @@ static task_t *copy_process(unsigned long clone_flags,
* of CLONE_PTRACE.
*/
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+ clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
+#endif
/* Our parent execution domain becomes current domain
These must match for thread signalling to apply */
diff --git a/kernel/itimer.c b/kernel/itimer.c
index a72cb0e5aa4b..7c1b25e25e47 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -112,28 +112,11 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
return error;
}
-/*
- * Called with P->sighand->siglock held and P->signal->real_timer inactive.
- * If interval is nonzero, arm the timer for interval ticks from now.
- */
-static inline void it_real_arm(struct task_struct *p, unsigned long interval)
-{
- p->signal->it_real_value = interval; /* XXX unnecessary field?? */
- if (interval == 0)
- return;
- if (interval > (unsigned long) LONG_MAX)
- interval = LONG_MAX;
- /* the "+ 1" below makes sure that the timer doesn't go off before
- * the interval requested. This could happen if
- * time requested % (usecs per jiffy) is more than the usecs left
- * in the current jiffy */
- p->signal->real_timer.expires = jiffies + interval + 1;
- add_timer(&p->signal->real_timer);
-}
void it_real_fn(unsigned long __data)
{
struct task_struct * p = (struct task_struct *) __data;
+ unsigned long inc = p->signal->it_real_incr;
send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p);
@@ -141,14 +124,23 @@ void it_real_fn(unsigned long __data)
* Now restart the timer if necessary. We don't need any locking
* here because do_setitimer makes sure we have finished running
* before it touches anything.
+ * Note, we KNOW we are (or should be) at a jiffie edge here so
+ * we don't need the +1 stuff. Also, we want to use the prior
+ * expire value so as to not "slip" a jiffie if we are late.
+ * Deal with requesting a time prior to "now" here rather than
+ * in add_timer.
*/
- it_real_arm(p, p->signal->it_real_incr);
+ if (!inc)
+ return;
+ while (time_before_eq(p->signal->real_timer.expires, jiffies))
+ p->signal->real_timer.expires += inc;
+ add_timer(&p->signal->real_timer);
}
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
{
struct task_struct *tsk = current;
- unsigned long val, interval;
+ unsigned long val, interval, expires;
cputime_t cval, cinterval, nval, ninterval;
switch (which) {
@@ -164,7 +156,10 @@ again:
}
tsk->signal->it_real_incr =
timeval_to_jiffies(&value->it_interval);
- it_real_arm(tsk, timeval_to_jiffies(&value->it_value));
+ expires = timeval_to_jiffies(&value->it_value);
+ if (expires)
+ mod_timer(&tsk->signal->real_timer,
+ jiffies + 1 + expires);
spin_unlock_irq(&tsk->sighand->siglock);
if (ovalue) {
jiffies_to_timeval(val, &ovalue->it_value);
diff --git a/kernel/module.c b/kernel/module.c
index 068e271ab3a5..c32995fbd8fd 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -250,13 +250,18 @@ static inline unsigned int block_size(int val)
/* Created by linker magic */
extern char __per_cpu_start[], __per_cpu_end[];
-static void *percpu_modalloc(unsigned long size, unsigned long align)
+static void *percpu_modalloc(unsigned long size, unsigned long align,
+ const char *name)
{
unsigned long extra;
unsigned int i;
void *ptr;
- BUG_ON(align > SMP_CACHE_BYTES);
+ if (align > SMP_CACHE_BYTES) {
+ printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n",
+ name, align, SMP_CACHE_BYTES);
+ align = SMP_CACHE_BYTES;
+ }
ptr = __per_cpu_start;
for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
@@ -348,7 +353,8 @@ static int percpu_modinit(void)
}
__initcall(percpu_modinit);
#else /* ... !CONFIG_SMP */
-static inline void *percpu_modalloc(unsigned long size, unsigned long align)
+static inline void *percpu_modalloc(unsigned long size, unsigned long align,
+ const char *name)
{
return NULL;
}
@@ -1644,7 +1650,8 @@ static struct module *load_module(void __user *umod,
if (pcpuindex) {
/* We have a special allocation for this section. */
percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
- sechdrs[pcpuindex].sh_addralign);
+ sechdrs[pcpuindex].sh_addralign,
+ mod->name);
if (!percpu) {
err = -ENOMEM;
goto free_mod;
diff --git a/kernel/panic.c b/kernel/panic.c
index 74ba5f3e46c7..aabc5f86fa3f 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -111,12 +111,11 @@ NORET_TYPE void panic(const char * fmt, ...)
mdelay(1);
i++;
}
- /*
- * Should we run the reboot notifier. For the moment Im
- * choosing not too. It might crash, be corrupt or do
- * more harm than good for other reasons.
+ /* This will not be a clean reboot, with everything
+ * shutting down. But if there is a chance of
+ * rebooting the system it will be rebooted.
*/
- machine_restart(NULL);
+ emergency_restart();
}
#ifdef __sparc__
{
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5b7b4736d82b..38798a2ff994 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -896,21 +896,10 @@ static int adjust_abs_time(struct k_clock *clock, struct timespec *tp,
jiffies_64_f = get_jiffies_64();
}
/*
- * Take away now to get delta
+ * Take away now to get delta and normalize
*/
- oc.tv_sec -= now.tv_sec;
- oc.tv_nsec -= now.tv_nsec;
- /*
- * Normalize...
- */
- while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) {
- oc.tv_nsec -= NSEC_PER_SEC;
- oc.tv_sec++;
- }
- while ((oc.tv_nsec) < 0) {
- oc.tv_nsec += NSEC_PER_SEC;
- oc.tv_sec--;
- }
+ set_normalized_timespec(&oc, oc.tv_sec - now.tv_sec,
+ oc.tv_nsec - now.tv_nsec);
}else{
jiffies_64_f = get_jiffies_64();
}
@@ -1177,7 +1166,6 @@ void exit_itimers(struct signal_struct *sig)
tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
itimer_delete(tmr);
}
- del_timer_sync(&sig->real_timer);
}
/*
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 2c7121d9bff1..917066a5767c 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -72,6 +72,18 @@ config PM_STD_PARTITION
suspended image to. It will simply pick the first available swap
device.
+config SWSUSP_ENCRYPT
+ bool "Encrypt suspend image"
+ depends on SOFTWARE_SUSPEND && CRYPTO=y && (CRYPTO_AES=y || CRYPTO_AES_586=y || CRYPTO_AES_X86_64=y)
+ default ""
+ ---help---
+ To prevent data gathering from swap after resume you can encrypt
+ the suspend image with a temporary key that is deleted on
+ resume.
+
+ Note that the temporary key is stored unencrypted on disk while the
+ system is suspended.
+
config SUSPEND_SMP
bool
depends on HOTPLUG_CPU && X86 && PM
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index c51a4d96d4eb..2d8bf054d036 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -16,6 +16,8 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/fs.h>
+#include <linux/mount.h>
+
#include "power.h"
@@ -57,16 +59,13 @@ static void power_down(suspend_disk_method_t mode)
error = pm_ops->enter(PM_SUSPEND_DISK);
break;
case PM_DISK_SHUTDOWN:
- printk("Powering off system\n");
- device_shutdown();
- machine_power_off();
+ kernel_power_off();
break;
case PM_DISK_REBOOT:
- device_shutdown();
- machine_restart(NULL);
+ kernel_restart(NULL);
break;
}
- machine_halt();
+ kernel_halt();
/* Valid image is on the disk, if we continue we risk serious data corruption
after resume. */
printk(KERN_CRIT "Please power me down manually\n");
@@ -113,24 +112,12 @@ static inline void platform_finish(void)
}
}
-static void finish(void)
-{
- device_resume();
- platform_finish();
- thaw_processes();
- enable_nonboot_cpus();
- pm_restore_console();
-}
-
-
static int prepare_processes(void)
{
int error;
pm_prepare_console();
-
sys_sync();
-
disable_nonboot_cpus();
if (freeze_processes()) {
@@ -163,15 +150,6 @@ static void unprepare_processes(void)
pm_restore_console();
}
-static int prepare_devices(void)
-{
- int error;
-
- if ((error = device_suspend(PMSG_FREEZE)))
- printk("Some devices failed to suspend\n");
- return error;
-}
-
/**
* pm_suspend_disk - The granpappy of power management.
*
@@ -188,17 +166,14 @@ int pm_suspend_disk(void)
error = prepare_processes();
if (error)
return error;
- error = prepare_devices();
+ error = device_suspend(PMSG_FREEZE);
if (error) {
+ printk("Some devices failed to suspend\n");
unprepare_processes();
return error;
}
- pr_debug("PM: Attempting to suspend to disk.\n");
- if (pm_disk_mode == PM_DISK_FIRMWARE)
- return pm_ops->enter(PM_SUSPEND_DISK);
-
pr_debug("PM: snapshotting memory.\n");
in_suspend = 1;
if ((error = swsusp_suspend()))
@@ -209,11 +184,20 @@ int pm_suspend_disk(void)
error = swsusp_write();
if (!error)
power_down(pm_disk_mode);
+ else {
+ /* swsusp_write can not fail in device_resume,
+ no need to do second device_resume */
+ swsusp_free();
+ unprepare_processes();
+ return error;
+ }
} else
pr_debug("PM: Image restored successfully.\n");
+
swsusp_free();
Done:
- finish();
+ device_resume();
+ unprepare_processes();
return error;
}
@@ -234,9 +218,12 @@ static int software_resume(void)
{
int error;
+ down(&pm_sem);
if (!swsusp_resume_device) {
- if (!strlen(resume_file))
+ if (!strlen(resume_file)) {
+ up(&pm_sem);
return -ENOENT;
+ }
swsusp_resume_device = name_to_dev_t(resume_file);
pr_debug("swsusp: Resume From Partition %s\n", resume_file);
} else {
@@ -249,6 +236,7 @@ static int software_resume(void)
* FIXME: If noresume is specified, we need to find the partition
* and reset it back to normal swap space.
*/
+ up(&pm_sem);
return 0;
}
@@ -271,20 +259,24 @@ static int software_resume(void)
pr_debug("PM: Preparing devices for restore.\n");
- if ((error = prepare_devices()))
+ if ((error = device_suspend(PMSG_FREEZE))) {
+ printk("Some devices failed to suspend\n");
goto Free;
+ }
mb();
pr_debug("PM: Restoring saved image.\n");
swsusp_resume();
pr_debug("PM: Restore failed, recovering.n");
- finish();
+ device_resume();
Free:
swsusp_free();
Cleanup:
unprepare_processes();
Done:
+ /* For success case, the suspend path will release the lock */
+ up(&pm_sem);
pr_debug("PM: Resume from disk failed.\n");
return 0;
}
@@ -391,7 +383,9 @@ static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t
if (sscanf(buf, "%u:%u", &maj, &min) == 2) {
res = MKDEV(maj,min);
if (maj == MAJOR(res) && min == MINOR(res)) {
+ down(&pm_sem);
swsusp_resume_device = res;
+ up(&pm_sem);
printk("Attempting manual resume\n");
noresume = 0;
software_resume();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index c94cb9e95090..22bdc93cc038 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -19,6 +19,9 @@
#include "power.h"
+/*This is just an arbitrary number */
+#define FREE_PAGE_NUMBER (100)
+
DECLARE_MUTEX(pm_sem);
struct pm_ops * pm_ops = NULL;
@@ -49,6 +52,7 @@ void pm_set_ops(struct pm_ops * ops)
static int suspend_prepare(suspend_state_t state)
{
int error = 0;
+ unsigned int free_pages;
if (!pm_ops || !pm_ops->enter)
return -EPERM;
@@ -67,6 +71,16 @@ static int suspend_prepare(suspend_state_t state)
goto Thaw;
}
+ if ((free_pages = nr_free_pages()) < FREE_PAGE_NUMBER) {
+ pr_debug("PM: free some memory\n");
+ shrink_all_memory(FREE_PAGE_NUMBER - free_pages);
+ if (nr_free_pages() < FREE_PAGE_NUMBER) {
+ error = -ENOMEM;
+ printk(KERN_ERR "PM: No enough memory\n");
+ goto Thaw;
+ }
+ }
+
if (pm_ops->prepare) {
if ((error = pm_ops->prepare(state)))
goto Thaw;
@@ -129,11 +143,12 @@ static void suspend_finish(suspend_state_t state)
-static char * pm_states[] = {
+static char *pm_states[PM_SUSPEND_MAX] = {
[PM_SUSPEND_STANDBY] = "standby",
[PM_SUSPEND_MEM] = "mem",
+#ifdef CONFIG_SOFTWARE_SUSPEND
[PM_SUSPEND_DISK] = "disk",
- NULL,
+#endif
};
@@ -194,7 +209,7 @@ int software_suspend(void)
int pm_suspend(suspend_state_t state)
{
- if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX)
+ if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
return enter_state(state);
return -EINVAL;
}
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 715081b2d829..7a4144ba3afd 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/workqueue.h>
+#include <linux/reboot.h>
/*
* When the user hits Sys-Rq o to power down the machine this is the
@@ -17,8 +18,7 @@
static void do_poweroff(void *dummy)
{
- if (pm_power_off)
- pm_power_off();
+ kernel_power_off();
}
static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 3bd0d261818f..28de118f7a0b 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -38,7 +38,6 @@ void refrigerator(void)
processes around? */
long save;
save = current->state;
- current->state = TASK_UNINTERRUPTIBLE;
pr_debug("%s entered refrigerator\n", current->comm);
printk("=");
@@ -47,8 +46,10 @@ void refrigerator(void)
recalc_sigpending(); /* We sent fake signal, clean it up */
spin_unlock_irq(&current->sighand->siglock);
- while (frozen(current))
+ while (frozen(current)) {
+ current->state = TASK_UNINTERRUPTIBLE;
schedule();
+ }
pr_debug("%s left refrigerator\n", current->comm);
current->state = save;
}
@@ -80,13 +81,33 @@ int freeze_processes(void)
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
yield(); /* Yield is okay here */
- if (time_after(jiffies, start_time + TIMEOUT)) {
+ if (todo && time_after(jiffies, start_time + TIMEOUT)) {
printk( "\n" );
printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
- return todo;
+ break;
}
} while(todo);
+ /* This does not unfreeze processes that are already frozen
+ * (we have slightly ugly calling convention in that respect,
+ * and caller must call thaw_processes() if something fails),
+ * but it cleans up leftover PF_FREEZE requests.
+ */
+ if (todo) {
+ read_lock(&tasklist_lock);
+ do_each_thread(g, p)
+ if (freezing(p)) {
+ pr_debug(" clean up: %s\n", p->comm);
+ p->flags &= ~PF_FREEZE;
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ recalc_sigpending_tsk(p);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ }
+ while_each_thread(g, p);
+ read_unlock(&tasklist_lock);
+ return todo;
+ }
+
printk( "|\n" );
BUG_ON(in_atomic());
return 0;
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
index bbe23079c62c..911fc62b8225 100644
--- a/kernel/power/smp.c
+++ b/kernel/power/smp.c
@@ -38,7 +38,7 @@ void disable_nonboot_cpus(void)
}
printk("Error taking cpu %d down: %d\n", cpu, error);
}
- BUG_ON(smp_processor_id() != 0);
+ BUG_ON(raw_smp_processor_id() != 0);
if (error)
panic("cpus not sleeping");
}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 7d7801cd01f0..eaacd5cb5889 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -31,6 +31,9 @@
* Alex Badea <vampire@go.ro>:
* Fixed runaway init
*
+ * Andreas Steinmetz <ast@domdv.de>:
+ * Added encrypted suspend option
+ *
* More state savers are welcome. Especially for the scsi layer...
*
* For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
@@ -63,6 +66,7 @@
#include <linux/console.h>
#include <linux/highmem.h>
#include <linux/bio.h>
+#include <linux/mount.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -70,8 +74,16 @@
#include <asm/tlbflush.h>
#include <asm/io.h>
+#include <linux/random.h>
+#include <linux/crypto.h>
+#include <asm/scatterlist.h>
+
#include "power.h"
+#define CIPHER "aes"
+#define MAXKEY 32
+#define MAXIV 32
+
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
@@ -102,7 +114,8 @@ static suspend_pagedir_t *pagedir_save;
#define SWSUSP_SIG "S1SUSPEND"
static struct swsusp_header {
- char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
+ char reserved[PAGE_SIZE - 20 - MAXKEY - MAXIV - sizeof(swp_entry_t)];
+ u8 key_iv[MAXKEY+MAXIV];
swp_entry_t swsusp_info;
char orig_sig[10];
char sig[10];
@@ -128,6 +141,131 @@ static struct swsusp_info swsusp_info;
static unsigned short swapfile_used[MAX_SWAPFILES];
static unsigned short root_swap;
+static int write_page(unsigned long addr, swp_entry_t * loc);
+static int bio_read_page(pgoff_t page_off, void * page);
+
+static u8 key_iv[MAXKEY+MAXIV];
+
+#ifdef CONFIG_SWSUSP_ENCRYPT
+
+static int crypto_init(int mode, void **mem)
+{
+ int error = 0;
+ int len;
+ char *modemsg;
+ struct crypto_tfm *tfm;
+
+ modemsg = mode ? "suspend not possible" : "resume not possible";
+
+ tfm = crypto_alloc_tfm(CIPHER, CRYPTO_TFM_MODE_CBC);
+ if(!tfm) {
+ printk(KERN_ERR "swsusp: no tfm, %s\n", modemsg);
+ error = -EINVAL;
+ goto out;
+ }
+
+ if(MAXKEY < crypto_tfm_alg_min_keysize(tfm)) {
+ printk(KERN_ERR "swsusp: key buffer too small, %s\n", modemsg);
+ error = -ENOKEY;
+ goto fail;
+ }
+
+ if (mode)
+ get_random_bytes(key_iv, MAXKEY+MAXIV);
+
+ len = crypto_tfm_alg_max_keysize(tfm);
+ if (len > MAXKEY)
+ len = MAXKEY;
+
+ if (crypto_cipher_setkey(tfm, key_iv, len)) {
+ printk(KERN_ERR "swsusp: key setup failure, %s\n", modemsg);
+ error = -EKEYREJECTED;
+ goto fail;
+ }
+
+ len = crypto_tfm_alg_ivsize(tfm);
+
+ if (MAXIV < len) {
+ printk(KERN_ERR "swsusp: iv buffer too small, %s\n", modemsg);
+ error = -EOVERFLOW;
+ goto fail;
+ }
+
+ crypto_cipher_set_iv(tfm, key_iv+MAXKEY, len);
+
+ *mem=(void *)tfm;
+
+ goto out;
+
+fail: crypto_free_tfm(tfm);
+out: return error;
+}
+
+static __inline__ void crypto_exit(void *mem)
+{
+ crypto_free_tfm((struct crypto_tfm *)mem);
+}
+
+static __inline__ int crypto_write(struct pbe *p, void *mem)
+{
+ int error = 0;
+ struct scatterlist src, dst;
+
+ src.page = virt_to_page(p->address);
+ src.offset = 0;
+ src.length = PAGE_SIZE;
+ dst.page = virt_to_page((void *)&swsusp_header);
+ dst.offset = 0;
+ dst.length = PAGE_SIZE;
+
+ error = crypto_cipher_encrypt((struct crypto_tfm *)mem, &dst, &src,
+ PAGE_SIZE);
+
+ if (!error)
+ error = write_page((unsigned long)&swsusp_header,
+ &(p->swap_address));
+ return error;
+}
+
+static __inline__ int crypto_read(struct pbe *p, void *mem)
+{
+ int error = 0;
+ struct scatterlist src, dst;
+
+ error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+ if (!error) {
+ src.offset = 0;
+ src.length = PAGE_SIZE;
+ dst.offset = 0;
+ dst.length = PAGE_SIZE;
+ src.page = dst.page = virt_to_page((void *)p->address);
+
+ error = crypto_cipher_decrypt((struct crypto_tfm *)mem, &dst,
+ &src, PAGE_SIZE);
+ }
+ return error;
+}
+#else
+static __inline__ int crypto_init(int mode, void *mem)
+{
+ return 0;
+}
+
+static __inline__ void crypto_exit(void *mem)
+{
+}
+
+static __inline__ int crypto_write(struct pbe *p, void *mem)
+{
+ return write_page(p->address, &(p->swap_address));
+}
+
+static __inline__ int crypto_read(struct pbe *p, void *mem)
+{
+ return bio_read_page(swp_offset(p->swap_address), (void *)p->address);
+}
+#endif
+
static int mark_swapfiles(swp_entry_t prev)
{
int error;
@@ -139,6 +277,7 @@ static int mark_swapfiles(swp_entry_t prev)
!memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
+ memcpy(swsusp_header.key_iv, key_iv, MAXKEY+MAXIV);
swsusp_header.swsusp_info = prev;
error = rw_swap_page_sync(WRITE,
swp_entry(root_swap, 0),
@@ -178,9 +317,9 @@ static int swsusp_swap_check(void) /* This is called before saving image */
len=strlen(resume_file);
root_swap = 0xFFFF;
- swap_list_lock();
+ spin_lock(&swap_lock);
for (i=0; i<MAX_SWAPFILES; i++) {
- if (swap_info[i].flags == 0) {
+ if (!(swap_info[i].flags & SWP_WRITEOK)) {
swapfile_used[i]=SWAPFILE_UNUSED;
} else {
if (!len) {
@@ -201,7 +340,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */
}
}
}
- swap_list_unlock();
+ spin_unlock(&swap_lock);
return (root_swap != 0xffff) ? 0 : -ENODEV;
}
@@ -215,12 +354,12 @@ static void lock_swapdevices(void)
{
int i;
- swap_list_lock();
+ spin_lock(&swap_lock);
for (i = 0; i< MAX_SWAPFILES; i++)
if (swapfile_used[i] == SWAPFILE_IGNORED) {
- swap_info[i].flags ^= 0xFF;
+ swap_info[i].flags ^= SWP_WRITEOK;
}
- swap_list_unlock();
+ spin_unlock(&swap_lock);
}
/**
@@ -285,6 +424,10 @@ static int data_write(void)
int error = 0, i = 0;
unsigned int mod = nr_copy_pages / 100;
struct pbe *p;
+ void *tfm;
+
+ if ((error = crypto_init(1, &tfm)))
+ return error;
if (!mod)
mod = 1;
@@ -293,11 +436,14 @@ static int data_write(void)
for_each_pbe (p, pagedir_nosave) {
if (!(i%mod))
printk( "\b\b\b\b%3d%%", i / mod );
- if ((error = write_page(p->address, &(p->swap_address))))
+ if ((error = crypto_write(p, tfm))) {
+ crypto_exit(tfm);
return error;
+ }
i++;
}
printk("\b\b\b\bdone\n");
+ crypto_exit(tfm);
return error;
}
@@ -384,7 +530,6 @@ static int write_pagedir(void)
* write_suspend_image - Write entire image and metadata.
*
*/
-
static int write_suspend_image(void)
{
int error;
@@ -399,6 +544,7 @@ static int write_suspend_image(void)
if ((error = close_swap()))
goto FreePagedir;
Done:
+ memset(key_iv, 0, MAXKEY+MAXIV);
return error;
FreePagedir:
free_pagedir_entries();
@@ -590,18 +736,7 @@ static void copy_data_pages(void)
static int calc_nr(int nr_copy)
{
- int extra = 0;
- int mod = !!(nr_copy % PBES_PER_PAGE);
- int diff = (nr_copy / PBES_PER_PAGE) + mod;
-
- do {
- extra += diff;
- nr_copy += diff;
- mod = !!(nr_copy % PBES_PER_PAGE);
- diff = (nr_copy / PBES_PER_PAGE) + mod - extra;
- } while (diff > 0);
-
- return nr_copy;
+ return nr_copy + (nr_copy+PBES_PER_PAGE-2)/(PBES_PER_PAGE-1);
}
/**
@@ -885,20 +1020,21 @@ int swsusp_suspend(void)
* at resume time, and evil weirdness ensues.
*/
if ((error = device_power_down(PMSG_FREEZE))) {
+ printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
local_irq_enable();
return error;
}
if ((error = swsusp_swap_check())) {
- printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try "
- "swapon -a!\n");
+ printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n");
+ device_power_up();
local_irq_enable();
return error;
}
save_processor_state();
if ((error = swsusp_arch_suspend()))
- printk("Error %d suspending\n", error);
+ printk(KERN_ERR "Error %d suspending\n", error);
/* Restore control flow magically appears here */
restore_processor_state();
BUG_ON (nr_copy_pages_check != nr_copy_pages);
@@ -1178,7 +1314,8 @@ static const char * sanity_check(void)
if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
return "machine";
#if 0
- if(swsusp_info.cpus != num_online_cpus())
+ /* We can't use number of online CPUs when we use hotplug to remove them ;-))) */
+ if (swsusp_info.cpus != num_possible_cpus())
return "number of cpus";
#endif
return NULL;
@@ -1211,13 +1348,14 @@ static int check_sig(void)
return error;
if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
+ memcpy(key_iv, swsusp_header.key_iv, MAXKEY+MAXIV);
+ memset(swsusp_header.key_iv, 0, MAXKEY+MAXIV);
/*
* Reset swap signature now.
*/
error = bio_write_page(0, &swsusp_header);
} else {
- printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n");
return -EINVAL;
}
if (!error)
@@ -1238,6 +1376,10 @@ static int data_read(struct pbe *pblist)
int error = 0;
int i = 0;
int mod = swsusp_info.image_pages / 100;
+ void *tfm;
+
+ if ((error = crypto_init(0, &tfm)))
+ return error;
if (!mod)
mod = 1;
@@ -1249,19 +1391,18 @@ static int data_read(struct pbe *pblist)
if (!(i % mod))
printk("\b\b\b\b%3d%%", i / mod);
- error = bio_read_page(swp_offset(p->swap_address),
- (void *)p->address);
- if (error)
+ if ((error = crypto_read(p, tfm))) {
+ crypto_exit(tfm);
return error;
+ }
i++;
}
printk("\b\b\b\bdone\n");
+ crypto_exit(tfm);
return error;
}
-extern dev_t name_to_dev_t(const char *line);
-
/**
* read_pagedir - Read page backup list pages from swap
*/
@@ -1386,6 +1527,7 @@ int swsusp_read(void)
error = read_suspend_image();
blkdev_put(resume_bdev);
+ memset(key_iv, 0, MAXKEY+MAXIV);
if (!error)
pr_debug("swsusp: Reading resume file was successful\n");
diff --git a/kernel/sched.c b/kernel/sched.c
index 4107db0dc091..5f889d0cbfcc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3378,8 +3378,8 @@ EXPORT_SYMBOL(set_user_nice);
*/
int can_nice(const task_t *p, const int nice)
{
- /* convert nice value [19,-20] to rlimit style value [0,39] */
- int nice_rlim = 19 - nice;
+ /* convert nice value [19,-20] to rlimit style value [1,40] */
+ int nice_rlim = 20 - nice;
return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
capable(CAP_SYS_NICE));
}
@@ -3486,7 +3486,7 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
p->policy = policy;
p->rt_priority = prio;
if (policy != SCHED_NORMAL)
- p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
+ p->prio = MAX_RT_PRIO-1 - p->rt_priority;
else
p->prio = p->static_prio;
}
@@ -3518,7 +3518,8 @@ recheck:
* 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
*/
if (param->sched_priority < 0 ||
- param->sched_priority > MAX_USER_RT_PRIO-1)
+ (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
+ (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
return -EINVAL;
if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
return -EINVAL;
@@ -3528,7 +3529,8 @@ recheck:
*/
if (!capable(CAP_SYS_NICE)) {
/* can't change policy */
- if (policy != p->policy)
+ if (policy != p->policy &&
+ !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
return -EPERM;
/* can't increase priority */
if (policy != SCHED_NORMAL &&
diff --git a/kernel/signal.c b/kernel/signal.c
index ca1186eef938..d282fea81138 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -692,7 +692,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
{
struct task_struct *t;
- if (p->flags & SIGNAL_GROUP_EXIT)
+ if (p->signal->flags & SIGNAL_GROUP_EXIT)
/*
* The process is in the middle of dying already.
*/
diff --git a/kernel/sys.c b/kernel/sys.c
index 9a24374c23bc..0bcaed6560ac 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -361,6 +361,64 @@ out_unlock:
return retval;
}
+void emergency_restart(void)
+{
+ machine_emergency_restart();
+}
+EXPORT_SYMBOL_GPL(emergency_restart);
+
+void kernel_restart(char *cmd)
+{
+ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
+ system_state = SYSTEM_RESTART;
+ device_shutdown();
+ if (!cmd) {
+ printk(KERN_EMERG "Restarting system.\n");
+ } else {
+ printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
+ }
+ printk(".\n");
+ machine_restart(cmd);
+}
+EXPORT_SYMBOL_GPL(kernel_restart);
+
+void kernel_kexec(void)
+{
+#ifdef CONFIG_KEXEC
+ struct kimage *image;
+ image = xchg(&kexec_image, 0);
+ if (!image) {
+ return;
+ }
+ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
+ system_state = SYSTEM_RESTART;
+ device_shutdown();
+ printk(KERN_EMERG "Starting new kernel\n");
+ machine_shutdown();
+ machine_kexec(image);
+#endif
+}
+EXPORT_SYMBOL_GPL(kernel_kexec);
+
+void kernel_halt(void)
+{
+ notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
+ system_state = SYSTEM_HALT;
+ device_shutdown();
+ printk(KERN_EMERG "System halted.\n");
+ machine_halt();
+}
+EXPORT_SYMBOL_GPL(kernel_halt);
+
+void kernel_power_off(void)
+{
+ notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
+ system_state = SYSTEM_POWER_OFF;
+ device_shutdown();
+ printk(KERN_EMERG "Power down.\n");
+ machine_power_off();
+}
+EXPORT_SYMBOL_GPL(kernel_power_off);
/*
* Reboot system call: for obvious reasons only root may call it,
@@ -389,11 +447,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
lock_kernel();
switch (cmd) {
case LINUX_REBOOT_CMD_RESTART:
- notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
- system_state = SYSTEM_RESTART;
- device_shutdown();
- printk(KERN_EMERG "Restarting system.\n");
- machine_restart(NULL);
+ kernel_restart(NULL);
break;
case LINUX_REBOOT_CMD_CAD_ON:
@@ -405,23 +459,13 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
break;
case LINUX_REBOOT_CMD_HALT:
- notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
- system_state = SYSTEM_HALT;
- device_suspend(PMSG_SUSPEND);
- device_shutdown();
- printk(KERN_EMERG "System halted.\n");
- machine_halt();
+ kernel_halt();
unlock_kernel();
do_exit(0);
break;
case LINUX_REBOOT_CMD_POWER_OFF:
- notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
- system_state = SYSTEM_POWER_OFF;
- device_suspend(PMSG_SUSPEND);
- device_shutdown();
- printk(KERN_EMERG "Power down.\n");
- machine_power_off();
+ kernel_power_off();
unlock_kernel();
do_exit(0);
break;
@@ -433,32 +477,14 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
}
buffer[sizeof(buffer) - 1] = '\0';
- notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
- system_state = SYSTEM_RESTART;
- device_suspend(PMSG_FREEZE);
- device_shutdown();
- printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
- machine_restart(buffer);
+ kernel_restart(buffer);
break;
-#ifdef CONFIG_KEXEC
case LINUX_REBOOT_CMD_KEXEC:
- {
- struct kimage *image;
- image = xchg(&kexec_image, 0);
- if (!image) {
- unlock_kernel();
- return -EINVAL;
- }
- notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
- system_state = SYSTEM_RESTART;
- device_shutdown();
- printk(KERN_EMERG "Starting new kernel\n");
- machine_shutdown();
- machine_kexec(image);
- break;
- }
-#endif
+ kernel_kexec();
+ unlock_kernel();
+ return -EINVAL;
+
#ifdef CONFIG_SOFTWARE_SUSPEND
case LINUX_REBOOT_CMD_SW_SUSPEND:
{
@@ -478,8 +504,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
static void deferred_cad(void *dummy)
{
- notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
- machine_restart(NULL);
+ kernel_restart(NULL);
}
/*
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 29196ce9b40f..1ab2370e2efa 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -79,7 +79,9 @@ cond_syscall(sys_request_key);
cond_syscall(sys_keyctl);
cond_syscall(compat_sys_keyctl);
cond_syscall(compat_sys_socketcall);
-cond_syscall(sys_set_zone_reclaim);
+cond_syscall(sys_inotify_init);
+cond_syscall(sys_inotify_add_watch);
+cond_syscall(sys_inotify_rm_watch);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 270ee7fadbd8..8e56e2495542 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -31,6 +31,7 @@
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/net.h>
#include <linux/sysrq.h>
#include <linux/highuid.h>
#include <linux/writeback.h>
@@ -114,6 +115,7 @@ extern int unaligned_enabled;
extern int sysctl_ieee_emulation_warnings;
#endif
extern int sysctl_userprocess_debug;
+extern int spin_retry;
#endif
extern int sysctl_hz_timer;
@@ -135,9 +137,6 @@ static struct ctl_table_header root_table_header =
static ctl_table kern_table[];
static ctl_table vm_table[];
-#ifdef CONFIG_NET
-extern ctl_table net_table[];
-#endif
static ctl_table proc_table[];
static ctl_table fs_table[];
static ctl_table debug_table[];
@@ -146,6 +145,9 @@ extern ctl_table random_table[];
#ifdef CONFIG_UNIX98_PTYS
extern ctl_table pty_table[];
#endif
+#ifdef CONFIG_INOTIFY
+extern ctl_table inotify_table[];
+#endif
#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
int sysctl_legacy_va_layout;
@@ -218,6 +220,7 @@ static ctl_table root_table[] = {
.mode = 0555,
.child = dev_table,
},
+
{ .ctl_name = 0 }
};
@@ -643,7 +646,16 @@ static ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
-
+#if defined(CONFIG_ARCH_S390)
+ {
+ .ctl_name = KERN_SPIN_RETRY,
+ .procname = "spin_retry",
+ .data = &spin_retry,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
{ .ctl_name = 0 }
};
@@ -950,6 +962,14 @@ static ctl_table fs_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#ifdef CONFIG_INOTIFY
+ {
+ .ctl_name = FS_INOTIFY,
+ .procname = "inotify",
+ .mode = 0555,
+ .child = inotify_table,
+ },
+#endif
#endif
{
.ctl_name = KERN_SETUID_DUMPABLE,
@@ -968,7 +988,7 @@ static ctl_table debug_table[] = {
static ctl_table dev_table[] = {
{ .ctl_name = 0 }
-};
+};
extern void init_irq_proc (void);
diff --git a/kernel/time.c b/kernel/time.c
index d4335c1c884c..dd5ae1162a8f 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -128,7 +128,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __us
* as real UNIX machines always do it. This avoids all headaches about
* daylight saving times and warping kernel clocks.
*/
-inline static void warp_clock(void)
+static inline void warp_clock(void)
{
write_seqlock_irq(&xtime_lock);
wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
diff --git a/kernel/timer.c b/kernel/timer.c
index f2a11887a726..5377f40723ff 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1023,7 +1023,7 @@ asmlinkage long sys_getppid(void)
parent = me->group_leader->real_parent;
for (;;) {
pid = parent->tgid;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
{
struct task_struct *old = parent;
diff --git a/kernel/user.c b/kernel/user.c
index 734575d55769..89e562feb1b1 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -120,6 +120,10 @@ struct user_struct * alloc_uid(uid_t uid)
atomic_set(&new->processes, 0);
atomic_set(&new->files, 0);
atomic_set(&new->sigpending, 0);
+#ifdef CONFIG_INOTIFY
+ atomic_set(&new->inotify_watches, 0);
+ atomic_set(&new->inotify_devs, 0);
+#endif
new->mq_bytes = 0;
new->locked_shm = 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 259cf55da3c9..c7e36d4a70ca 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -308,8 +308,6 @@ struct workqueue_struct *__create_workqueue(const char *name,
struct workqueue_struct *wq;
struct task_struct *p;
- BUG_ON(strlen(name) > 10);
-
wq = kmalloc(sizeof(*wq), GFP_KERNEL);
if (!wq)
return NULL;