From bca73e4bf8563d83f7856164caa44d5f42e44cca Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 13 Nov 2005 16:06:25 -0800 Subject: [PATCH] move pm_register/etc. to CONFIG_PM_LEGACY, pm_legacy.h Since few people need the support anymore, this moves the legacy pm_xxx functions to CONFIG_PM_LEGACY, and include/linux/pm_legacy.h. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/Kconfig | 9 +++++++++ kernel/power/Makefile | 3 ++- kernel/power/pm.c | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 46a5e5acff97..5ec248cb7f4a 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -19,6 +19,15 @@ config PM will issue the hlt instruction if nothing is to be done, thereby sending the processor to sleep and saving power. +config PM_LEGACY + bool "Legacy Power Management API" + depends on PM + default y + ---help--- + Support for pm_register() and friends. + + If unsure, say Y. + config PM_DEBUG bool "Power Management Debug Support" depends on PM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c71eb4579c07..04be7d0d96a7 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -3,7 +3,8 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -obj-y := main.o process.o console.o pm.o +obj-y := main.o process.o console.o +obj-$(CONFIG_PM_LEGACY) += pm.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o obj-$(CONFIG_SUSPEND_SMP) += smp.o diff --git a/kernel/power/pm.c b/kernel/power/pm.c index 159149321b3c..33c508e857dd 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c @@ -23,6 +23,7 @@ #include #include #include +#include #include int pm_active; -- cgit v1.2.3 From 005f18dfd0ed86c39716277b61dfb4bd2af91059 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 Nov 2005 16:06:33 -0800 Subject: [PATCH] fix task_struct leak in ptrace When ptrace_attach fails we need to drop the task_struct reference. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b88d4186cd7a..17ee7e5a3451 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -470,7 +470,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); - goto out; + goto out_put_task_struct; } ret = ptrace_check_attach(child, request == PTRACE_KILL); -- cgit v1.2.3 From 5563e77078d85c4f107a0a673500c43ce57cf702 Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Sun, 13 Nov 2005 16:06:35 -0800 Subject: [PATCH] cpuset: fix return without releasing semaphore It is wrong to acquire the semaphore and then return from cpuset_zone_allowed without releasing it. Signed-off-by: Bob Picco Acked-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 5a737ed9dac7..7430640f9816 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1809,11 +1809,12 @@ int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ return 0; + if (current->flags & PF_EXITING) /* Let dying task have memory */ + return 1; + /* Not hardwall and node outside mems_allowed: scan up cpusets */ down(&callback_sem); - if (current->flags & PF_EXITING) /* Let dying task have memory */ - return 1; task_lock(current); cs = nearest_exclusive_ancestor(current->cpuset); task_unlock(current); -- cgit v1.2.3 From a1261f54611ec4ad6a7ab7080f86747e3ac3685b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 13 Nov 2005 16:06:55 -0800 Subject: [PATCH] m68k: introduce task_thread_info new helper - task_thread_info(task). On platforms that have thread_info allocated separately (i.e. in default case) it simply returns task->thread_info. m68k wants (and for good reasons) to embed its thread_info into task_struct. So it will (in later patch) have task_thread_info() of its own. For now we just add a macro for generic case and convert existing instances of its body in core kernel to uses of new macro. Obviously safe - all normal architectures get the same preprocessor output they used to get. Signed-off-by: Al Viro Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- kernel/fork.c | 4 ++-- kernel/sched.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 452a1d116178..ee515683b92d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -859,7 +859,7 @@ fastcall NORET_TYPE void do_exit(long code) if (group_dead && tsk->signal->leader) disassociate_ctty(1); - module_put(tsk->thread_info->exec_domain->module); + module_put(task_thread_info(tsk)->exec_domain->module); if (tsk->binfmt) module_put(tsk->binfmt->module); diff --git a/kernel/fork.c b/kernel/fork.c index 158710d22566..7ef352ce347b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -919,7 +919,7 @@ static task_t *copy_process(unsigned long clone_flags, if (nr_threads >= max_threads) goto bad_fork_cleanup_count; - if (!try_module_get(p->thread_info->exec_domain->module)) + if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) @@ -1180,7 +1180,7 @@ bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: - module_put(p->thread_info->exec_domain->module); + module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); diff --git a/kernel/sched.c b/kernel/sched.c index b6506671b2be..831f7e9d8f1c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1437,7 +1437,7 @@ void fastcall sched_fork(task_t *p, int clone_flags) #endif #ifdef CONFIG_PREEMPT /* Want to start with kernel preemption disabled. */ - p->thread_info->preempt_count = 1; + task_thread_info(p)->preempt_count = 1; #endif /* * Share the timeslice between parent and child, thus the @@ -4410,9 +4410,9 @@ void __devinit init_idle(task_t *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) - idle->thread_info->preempt_count = (idle->lock_depth >= 0); + task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); #else - idle->thread_info->preempt_count = 0; + task_thread_info(idle)->preempt_count = 0; #endif } -- cgit v1.2.3 From 10ebffde3d3916026974352b7900e44afe2b243f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 13 Nov 2005 16:06:56 -0800 Subject: [PATCH] m68k: introduce setup_thread_stack() and end_of_stack() encapsulates the rest of arch-dependent operations with thread_info access. Two new helpers - setup_thread_stack() and end_of_stack(). For normal case the former consists of copying thread_info of parent to new thread_info and the latter returns pointer immediately past the end of thread_info. Signed-off-by: Al Viro Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 3 +-- kernel/sched.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 7ef352ce347b..2c70c9cdf5dc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -171,10 +171,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return NULL; } - *ti = *orig->thread_info; *tsk = *orig; tsk->thread_info = ti; - ti->task = tsk; + setup_thread_stack(tsk, orig); /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); diff --git a/kernel/sched.c b/kernel/sched.c index 831f7e9d8f1c..6f46c94cc29e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4327,10 +4327,10 @@ static void show_task(task_t *p) #endif #ifdef CONFIG_DEBUG_STACK_USAGE { - unsigned long *n = (unsigned long *) (p->thread_info+1); + unsigned long *n = end_of_stack(p); while (!*n) n++; - free = (unsigned long) n - (unsigned long)(p->thread_info+1); + free = (unsigned long)n - (unsigned long)end_of_stack(p); } #endif printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); -- cgit v1.2.3 From b17b0421d70f5b85a791afe145a16d5ca5f849aa Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 13 Nov 2005 16:07:14 -0800 Subject: [PATCH] signal handling: revert sigkill priority fix This patch reverts commit c33880aaddbbab1ccf36f4457ed1090621f2e39a since it's not needed anymore. As pointed out by Roland McGrath the real fix is to deliver all signals before returning to user space. See http://www.ussg.iu.edu/hypermail/linux/kernel/0509.2/0683.html A fix for s390 has been merged. Signed-off-by: Heiko Carstens Cc: Roland McGrath Cc: Ingo Molnar Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 80789a59b4db..d7611f189ef7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -513,16 +513,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, { int sig = 0; - /* SIGKILL must have priority, otherwise it is quite easy - * to create an unkillable process, sending sig < SIGKILL - * to self */ - if (unlikely(sigismember(&pending->signal, SIGKILL))) { - if (!sigismember(mask, SIGKILL)) - sig = SIGKILL; - } - - if (likely(!sig)) - sig = next_signal(pending, mask); + sig = next_signal(pending, mask); if (sig) { if (current->notifier) { if (sigismember(current->notifier_mask, sig)) { -- cgit v1.2.3 From dbdf65b1b7f8ec48bda1604cfea7ac09ce583d6b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Nov 2005 16:07:22 -0800 Subject: [PATCH] rcutorture: renice to low priority Make the box usable for interactive work when running the RCU torture test, by renicing the RCU torture-test threads to +19 by default. Kthreads run at nice -5 by default. Signed-off-by: Ingo Molnar Acked-by: Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcutorture.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9b58f1eff3ca..eb6719c50b4e 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -195,6 +195,8 @@ rcu_torture_writer(void *arg) static DEFINE_RCU_RANDOM(rand); VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); + set_user_nice(current, 19); + do { schedule_timeout_uninterruptible(1); if (rcu_batches_completed() == oldbatch) @@ -238,6 +240,8 @@ rcu_torture_reader(void *arg) int pipe_count; VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); + set_user_nice(current, 19); + do { rcu_read_lock(); completed = rcu_batches_completed(); -- cgit v1.2.3 From 4557398f8cbaf9f254cff747534b4724c7f75c4f Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Sun, 13 Nov 2005 16:07:30 -0800 Subject: [PATCH] stop_machine() vs. synchronous IPI send deadlock This fixes deadlock of stop_machine() vs. synchronous IPI send. The problem is that stop_machine() disables interrupts before disabling preemption on other CPUs. So if another CPU is preempted and then calls something like flush_tlb_all() it will deadlock with CPU doing stop_machine() and which can't process IPI due to disabled IRQs. I changed stop_machine() to do the same things exactly as it does on other CPUs, i.e. it should disable preemption first on _all_ CPUs including itself and only after that disable IRQs. Signed-off-by: Kirill Korotaev Cc: Rusty Russell Cc: "Andrey Savochkin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/stop_machine.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 84a9d18aa8da..b3d4dc858e35 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -119,13 +119,12 @@ static int stop_machine(void) return ret; } - /* Don't schedule us away at this point, please. */ - local_irq_disable(); - /* Now they are all started, make them hold the CPUs, ready. */ + preempt_disable(); stopmachine_set_state(STOPMACHINE_PREPARE); /* Make them disable irqs. */ + local_irq_disable(); stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); return 0; @@ -135,6 +134,7 @@ static void restart_machine(void) { stopmachine_set_state(STOPMACHINE_EXIT); local_irq_enable(); + preempt_enable_no_resched(); } struct stop_machine_data -- cgit v1.2.3 From 20dcae32439384b6863c626bb3b2a09bed65b33e Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Sun, 13 Nov 2005 16:07:33 -0800 Subject: [PATCH] aio: remove kioctx from mm_struct Sync iocbs have a life cycle that don't need a kioctx. Their retrying, if any, is done in the context of their owner who has allocated them on the stack. The sole user of a sync iocb's ctx reference was aio_complete() checking for an elevated iocb ref count that could never happen. No path which grabs an iocb ref has access to sync iocbs. If we were to implement sync iocb cancelation it would be done by the owner of the iocb using its on-stack reference. Removing this chunk from aio_complete allows us to remove the entire kioctx instance from mm_struct, reducing its size by a third. On a i386 testing box the slab size went from 768 to 504 bytes and from 5 to 8 per page. Signed-off-by: Zach Brown Acked-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 2c70c9cdf5dc..e0d0b77343f8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -323,7 +323,6 @@ static struct mm_struct * mm_init(struct mm_struct * mm) spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); mm->ioctx_list = NULL; - mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; -- cgit v1.2.3 From 3f39894d1b5c253b10fcb8fbbbcf65a330f6cdc7 Mon Sep 17 00:00:00 2001 From: George Anzinger Date: Sun, 13 Nov 2005 16:07:44 -0800 Subject: [PATCH] timespec: normalize off by one errors It would appear that the timespec normalize code has an off by one error. Found in three places. Thanks to Ben for spotting. Signed-off-by: George Anzinger Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/posix-timers.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ea55c7a1cd75..5870efb3e200 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -270,7 +270,7 @@ static void tstojiffie(struct timespec *tp, int res, u64 *jiff) long sec = tp->tv_sec; long nsec = tp->tv_nsec + res - 1; - if (nsec > NSEC_PER_SEC) { + if (nsec >= NSEC_PER_SEC) { sec++; nsec -= NSEC_PER_SEC; } @@ -1209,13 +1209,9 @@ static int do_posix_clock_monotonic_get(clockid_t clock, struct timespec *tp) do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono); - tp->tv_sec += wall_to_mono.tv_sec; - tp->tv_nsec += wall_to_mono.tv_nsec; + set_normalized_timespec(tp, tp->tv_sec + wall_to_mono.tv_sec, + tp->tv_nsec + wall_to_mono.tv_nsec); - if ((tp->tv_nsec - NSEC_PER_SEC) > 0) { - tp->tv_nsec -= NSEC_PER_SEC; - tp->tv_sec++; - } return 0; } -- cgit v1.2.3 From ddad86c2d6f660112c6ce8aabae6ffd346e25b9b Mon Sep 17 00:00:00 2001 From: Martin Waitz Date: Sun, 13 Nov 2005 16:08:14 -0800 Subject: [PATCH] DocBook: include printk documentation Add printk documentation to kernel-api. Signed-off-by: Martin Waitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index e9be027bc930..ac8a08f36207 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -491,7 +491,10 @@ __attribute__((weak)) unsigned long long printk_clock(void) return sched_clock(); } -/* +/** + * printk - print a kernel message + * @fmt: format string + * * This is printk. It can be called from any context. We want it to work. * * We try to grab the console_sem. If we succeed, it's easy - we log the output and @@ -503,6 +506,9 @@ __attribute__((weak)) unsigned long long printk_clock(void) * One effect of this deferred printing is that code which calls printk() and * then changes console_loglevel may break. This is because console_loglevel * is inspected when the actual printing occurs. + * + * See also: + * printf(3) */ asmlinkage int printk(const char *fmt, ...) @@ -655,6 +661,9 @@ static void call_console_drivers(unsigned long start, unsigned long end) /** * add_preferred_console - add a device to the list of preferred consoles. + * @name: device name + * @idx: device index + * @options: options for this console * * The last preferred console added will be used for kernel messages * and stdin/out/err for init. Normally this is used by console_setup @@ -764,7 +773,8 @@ void release_console_sem(void) } EXPORT_SYMBOL(release_console_sem); -/** console_conditional_schedule - yield the CPU if required +/** + * console_conditional_schedule - yield the CPU if required * * If the console code is currently allowed to sleep, and * if this CPU should yield the CPU to another task, do @@ -976,6 +986,8 @@ EXPORT_SYMBOL(unregister_console); /** * tty_write_message - write a message to a certain tty, not just the console. + * @tty: the destination tty_struct + * @msg: the message to write * * This is used for messages that need to be redirected to a specific tty. * We don't put it into the syslog queue right now maybe in the future if -- cgit v1.2.3