From a6fa8e5a6172a5a5bc06ed04f34e50b36c978127 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 30 Jan 2008 13:30:00 +0100 Subject: time: clean hungarian notation from timers Clean up hungarian notation from timer code. Signed-off-by: Pavel Machek Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 78cf899b4409..de0e71359ede 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -5,7 +5,7 @@ #include #include -struct tvec_t_base_s; +struct tvec_base; struct timer_list { struct list_head entry; @@ -14,7 +14,7 @@ struct timer_list { void (*function)(unsigned long); unsigned long data; - struct tvec_t_base_s *base; + struct tvec_base *base; #ifdef CONFIG_TIMER_STATS void *start_site; char start_comm[16]; @@ -22,7 +22,7 @@ struct timer_list { #endif }; -extern struct tvec_t_base_s boot_tvec_bases; +extern struct tvec_base boot_tvec_bases; #define TIMER_INITIALIZER(_function, _expires, _data) { \ .function = (_function), \ -- cgit v1.2.3 From 1d76c2622813fbc692b0d323028cfef9ee36051a Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Wed, 30 Jan 2008 13:30:01 +0100 Subject: clocksource: make CLOCKSOURCE_MASK bullet-proof Signed-off-by: Atsushi Nemoto Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 107787aacb64..07b42153de24 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -103,7 +103,7 @@ struct clocksource { #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 /* simplify initialization of mask field */ -#define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL< Date: Wed, 30 Jan 2008 13:30:02 +0100 Subject: clocksource: add unregister function to disable unusable clocksources On x86 the PIT might become an unusable clocksource. Add an unregister function to provide a possibilty to remove the PIT from the list of available clock sources. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 07b42153de24..85778a4b1209 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -215,6 +215,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c, /* used to install a new clocksource */ extern int clocksource_register(struct clocksource*); +extern void clocksource_unregister(struct clocksource*); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); -- cgit v1.2.3 From e3f37a54f690d3e64995ea7ecea08c5ab3070faf Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Wed, 30 Jan 2008 13:30:03 +0100 Subject: x86: assign IRQs to HPET timers The userspace API for the HPET (see Documentation/hpet.txt) did not work. The HPET_IE_ON ioctl was failing as there was no IRQ assigned to the timer device. This patch fixes it by allocating IRQs to timer blocks in the HPET. arch/x86/kernel/hpet.c | 13 +++++-------- drivers/char/hpet.c | 45 ++++++++++++++++++++++++++++++++++++++------- include/linux/hpet.h | 2 +- 3 files changed, 44 insertions(+), 16 deletions(-) Signed-off-by: Balaji Rao Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/hpet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 707f7cb9e795..e3c0b2aa944c 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -64,7 +64,7 @@ struct hpet { */ #define Tn_INT_ROUTE_CAP_MASK (0xffffffff00000000ULL) -#define Tn_INI_ROUTE_CAP_SHIFT (32UL) +#define Tn_INT_ROUTE_CAP_SHIFT (32UL) #define Tn_FSB_INT_DELCAP_MASK (0x8000UL) #define Tn_FSB_INT_DELCAP_SHIFT (15) #define Tn_FSB_EN_CNF_MASK (0x4000UL) -- cgit v1.2.3 From 6378ddb592158db4b42197f1bc8666228800e379 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Wed, 30 Jan 2008 13:30:04 +0100 Subject: time: track accurate idle time with tick_sched.idle_sleeptime Current idle time in kstat is based on jiffies and is coarse grained. tick_sched.idle_sleeptime is making some attempt to keep track of idle time in a fine grained manner. But, it is not handling the time spent in interrupts fully. Make tick_sched.idle_sleeptime accurate with respect to time spent on handling interrupts and also add tick_sched.idle_lastupdate, which keeps track of last time when idle_sleeptime was updated. This statistics will be crucial for cpufreq-ondemand governor, which can shed some conservative gaurd band that is uses today while setting the frequency. The ondemand changes that uses the exact idle time is coming soon. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/tick.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index f4a1395e05ff..0fadf95debe1 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -51,8 +51,10 @@ struct tick_sched { unsigned long idle_jiffies; unsigned long idle_calls; unsigned long idle_sleeps; + int idle_active; ktime_t idle_entrytime; ktime_t idle_sleeptime; + ktime_t idle_lastupdate; ktime_t sleep_length; unsigned long last_jiffies; unsigned long next_jiffies; @@ -103,6 +105,8 @@ extern void tick_nohz_stop_sched_tick(void); extern void tick_nohz_restart_sched_tick(void); extern void tick_nohz_update_jiffies(void); extern ktime_t tick_nohz_get_sleep_length(void); +extern void tick_nohz_stop_idle(int cpu); +extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); # else static inline void tick_nohz_stop_sched_tick(void) { } static inline void tick_nohz_restart_sched_tick(void) { } @@ -113,6 +117,8 @@ static inline ktime_t tick_nohz_get_sleep_length(void) return len; } +static inline void tick_nohz_stop_idle(int cpu) { } +static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; } # endif /* !NO_HZ */ #endif -- cgit v1.2.3 From c202f298de59c17c0a9799dc0e1b9e0629347935 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:08 +0100 Subject: x86: clean up arch/x86/ia32/sys_ia32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/compat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 0e69d2cf14aa..ba29d4c59643 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -191,6 +191,10 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timeval __user *tvp); +asmlinkage long compat_sys_wait4(compat_pid_t pid, + compat_uint_t *stat_addr, int options, + struct compat_rusage *ru); + #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_TO_COMPAT_LONGS(bits) \ -- cgit v1.2.3 From 70a20025632ca320316b5068326784d07c8ff351 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:18 +0100 Subject: x86: move pmtmr related declarations Move more stuff out of proto.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/acpi_pmtmr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 1d0ef1ae8036..7e3d2859be50 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h @@ -25,6 +25,8 @@ static inline u32 acpi_pm_read_early(void) return acpi_pm_read_verified() & ACPI_PM_MASK; } +extern void pmtimer_wait(unsigned); + #else static inline u32 acpi_pm_read_early(void) -- cgit v1.2.3 From 02456c708eab4515121e5e581422fa3be14368d1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:27 +0100 Subject: x86: nuke a ton of dead hpet code No users, just ballast Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/hpet.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hpet.h b/include/linux/hpet.h index e3c0b2aa944c..9cd94bfd07e5 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -115,9 +115,6 @@ static inline void hpet_reserve_timer(struct hpet_data *hd, int timer) } int hpet_alloc(struct hpet_data *); -int hpet_register(struct hpet_task *, int); -int hpet_unregister(struct hpet_task *); -int hpet_control(struct hpet_task *, unsigned int, unsigned long); #endif /* __KERNEL__ */ -- cgit v1.2.3 From c9cce83dd1d59f52e2c8f8c7d265ba4854c40785 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Wed, 30 Jan 2008 13:30:32 +0100 Subject: x86: remove extern declarations for code, data, bss resources This patch removes the extern struct resource declarations for data_resource, code_resource and bss_resource on x86 and declares that three structures as static as done on other architectures like IA64. On i386, these structures are moved to setup_32.c (from e820_32.c) because that's code that is not specific to e820 and also required on EFI systems. That makes the "extern" reference superfluous. On x86_64, data_resource, code_resource and bss_resource are passed to e820_reserve_resources() as arguments just as done on i386 and IA64. That also avoids the "extern" reference and it's possible to make it static. Signed-off-by: Bernhard Walle Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ioport.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6187a8567bc7..605d237364d2 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -8,6 +8,7 @@ #ifndef _LINUX_IOPORT_H #define _LINUX_IOPORT_H +#ifndef __ASSEMBLY__ #include #include /* @@ -153,4 +154,5 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); +#endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ -- cgit v1.2.3 From fb7fa8f1741c91f6c6e958762155abe9339476ca Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:30:47 +0100 Subject: ptrace: arch_has_single_step This defines the new macro arch_has_single_step() in linux/ptrace.h, a default for when asm/ptrace.h does not define it. It declares the new user_enable_single_step and user_disable_single_step functions. This is not used yet, but paves the way to harmonize on this interface for the arch-specific calls on all machines. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ptrace.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 3ea5750a0f7e..8f06c6fb22a5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -129,6 +129,52 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); #define force_successful_syscall_return() do { } while (0) #endif +/* + * should define the following things inside #ifdef __KERNEL__. + * + * These do-nothing inlines are used when the arch does not + * implement single-step. The kerneldoc comments are here + * to document the interface for all arch definitions. + */ + +#ifndef arch_has_single_step +/** + * arch_has_single_step - does this CPU support user-mode single-step? + * + * If this is defined, then there must be function declarations or + * inlines for user_enable_single_step() and user_disable_single_step(). + * arch_has_single_step() should evaluate to nonzero iff the machine + * supports instruction single-step for user mode. + * It can be a constant or it can test a CPU feature bit. + */ +#define arch_has_single_step() (0) + +/** + * user_enable_single_step - single-step in user-mode task + * @task: either current or a task stopped in %TASK_TRACED + * + * This can only be called when arch_has_single_step() has returned nonzero. + * Set @task so that when it returns to user mode, it will trap after the + * next single instruction executes. + */ +static inline void user_enable_single_step(struct task_struct *task) +{ + BUG(); /* This can never be called. */ +} + +/** + * user_disable_single_step - cancel user-mode single-step + * @task: either current or a task stopped in %TASK_TRACED + * + * Clear @task of the effects of user_enable_single_step(). This can + * be called whether or not user_enable_single_step() was ever called + * on @task, and even if arch_has_single_step() returned zero. + */ +static inline void user_disable_single_step(struct task_struct *task) +{ +} +#endif /* arch_has_single_step */ + #endif #endif -- cgit v1.2.3 From dc802c2d2e66e2d1544e023bfd4be6cdee48d57b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:30:53 +0100 Subject: ptrace: arch_has_block_step This defines the new macro arch_has_block_step() in linux/ptrace.h, a default for when asm/ptrace.h does not define it. This is the analog of arch_has_single_step() for step-until-branch on machines that have it. It declares the new user_enable_block_step function, which goes with the existing user_enable_single_step and user_disable_single_step. This is not used yet, but paves the way to harmonize on this interface for the arch-specific calls on all machines. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ptrace.h | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 8f06c6fb22a5..1febc541dda5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -155,7 +155,8 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); * * This can only be called when arch_has_single_step() has returned nonzero. * Set @task so that when it returns to user mode, it will trap after the - * next single instruction executes. + * next single instruction executes. If arch_has_block_step() is defined, + * this must clear the effects of user_enable_block_step() too. */ static inline void user_enable_single_step(struct task_struct *task) { @@ -166,15 +167,43 @@ static inline void user_enable_single_step(struct task_struct *task) * user_disable_single_step - cancel user-mode single-step * @task: either current or a task stopped in %TASK_TRACED * - * Clear @task of the effects of user_enable_single_step(). This can - * be called whether or not user_enable_single_step() was ever called - * on @task, and even if arch_has_single_step() returned zero. + * Clear @task of the effects of user_enable_single_step() and + * user_enable_block_step(). This can be called whether or not either + * of those was ever called on @task, and even if arch_has_single_step() + * returned zero. */ static inline void user_disable_single_step(struct task_struct *task) { } #endif /* arch_has_single_step */ +#ifndef arch_has_block_step +/** + * arch_has_block_step - does this CPU support user-mode block-step? + * + * If this is defined, then there must be a function declaration or inline + * for user_enable_block_step(), and arch_has_single_step() must be defined + * too. arch_has_block_step() should evaluate to nonzero iff the machine + * supports step-until-branch for user mode. It can be a constant or it + * can test a CPU feature bit. + */ +#define arch_has_single_step() (0) + +/** + * user_enable_block_step - step until branch in user-mode task + * @task: either current or a task stopped in %TASK_TRACED + * + * This can only be called when arch_has_block_step() has returned nonzero, + * and will never be called when single-instruction stepping is being used. + * Set @task so that when it returns to user mode, it will trap after the + * next branch or trap taken. + */ +static inline void user_enable_block_step(struct task_struct *task) +{ + BUG(); /* This can never be called. */ +} +#endif /* arch_has_block_step */ + #endif #endif -- cgit v1.2.3 From 5b88abbf770a0e1975c668743100f42934f385e8 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:30:53 +0100 Subject: ptrace: generic PTRACE_SINGLEBLOCK This makes ptrace_request handle PTRACE_SINGLEBLOCK along with PTRACE_CONT et al. The new generic code makes use of the arch_has_block_step macro and generic entry points on machines that define them. [ mingo@elte.hu: bugfix ] Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 1febc541dda5..515bff053de8 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -187,7 +187,7 @@ static inline void user_disable_single_step(struct task_struct *task) * supports step-until-branch for user mode. It can be a constant or it * can test a CPU feature bit. */ -#define arch_has_single_step() (0) +#define arch_has_block_step() (0) /** * user_enable_block_step - step until branch in user-mode task -- cgit v1.2.3 From 5548fecdff5617ba3a2f09f0e585e1ac6e1bd25c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 30 Jan 2008 13:30:55 +0100 Subject: x86: clean up bitops-related warnings Add casts to appropriate places to silence spurious bitops warnings. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/thread_info.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9c4ad755d7e5..dfbdfb9836f4 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -42,27 +42,27 @@ extern long do_no_restart_syscall(struct restart_block *parm); static inline void set_ti_thread_flag(struct thread_info *ti, int flag) { - set_bit(flag,&ti->flags); + set_bit(flag, (unsigned long *)&ti->flags); } static inline void clear_ti_thread_flag(struct thread_info *ti, int flag) { - clear_bit(flag,&ti->flags); + clear_bit(flag, (unsigned long *)&ti->flags); } static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag) { - return test_and_set_bit(flag,&ti->flags); + return test_and_set_bit(flag, (unsigned long *)&ti->flags); } static inline int test_and_clear_ti_thread_flag(struct thread_info *ti, int flag) { - return test_and_clear_bit(flag,&ti->flags); + return test_and_clear_bit(flag, (unsigned long *)&ti->flags); } static inline int test_ti_thread_flag(struct thread_info *ti, int flag) { - return test_bit(flag,&ti->flags); + return test_bit(flag, (unsigned long *)&ti->flags); } #define set_thread_flag(flag) \ -- cgit v1.2.3 From 2355188570790930718fb72444cddc2959039d9d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:31:10 +0100 Subject: x86: avoid build warning fix this build warning: include/asm/topology_32.h: In function 'node_to_first_cpu': include/asm/topology_32.h:66: warning: unused variable 'mask' Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/cpumask.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 85bd790c201e..7047f58306a7 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -218,8 +218,8 @@ int __first_cpu(const cpumask_t *srcp); int __next_cpu(int n, const cpumask_t *srcp); #define next_cpu(n, src) __next_cpu((n), &(src)) #else -#define first_cpu(src) 0 -#define next_cpu(n, src) 1 +#define first_cpu(src) ({ (void)(src); 0; }) +#define next_cpu(n, src) ({ (void)(src); 1; }) #endif #define cpumask_of_cpu(cpu) \ -- cgit v1.2.3 From 95c354fe9f7d6decc08a92aa26eb233ecc2155bf Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 30 Jan 2008 13:31:20 +0100 Subject: spinlock: lockbreak cleanup The break_lock data structure and code for spinlocks is quite nasty. Not only does it double the size of a spinlock but it changes locking to a potentially less optimal trylock. Put all of that under CONFIG_GENERIC_LOCKBREAK, and introduce a __raw_spin_is_contended that uses the lock data itself to determine whether there are waiters on the lock, to be used if CONFIG_GENERIC_LOCKBREAK is not set. Rename need_lockbreak to spin_needbreak, make it use spin_is_contended to decouple it from the spinlock implementation, and make it typesafe (rwlocks do not have any need_lockbreak sites -- why do they even get bloated up with that break_lock then?). Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 21 +++++++-------------- include/linux/spinlock.h | 6 ++++++ include/linux/spinlock_types.h | 4 ++-- include/linux/spinlock_up.h | 2 ++ 4 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2d0546e884ea..9d4797609aa5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1922,23 +1922,16 @@ extern int cond_resched_softirq(void); /* * Does a critical section need to be broken due to another - * task waiting?: + * task waiting?: (technically does not depend on CONFIG_PREEMPT, + * but a general need for low latency) */ -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -# define need_lockbreak(lock) ((lock)->break_lock) -#else -# define need_lockbreak(lock) 0 -#endif - -/* - * Does a critical section need to be broken due to another - * task waiting or preemption being signalled: - */ -static inline int lock_need_resched(spinlock_t *lock) +static inline int spin_needbreak(spinlock_t *lock) { - if (need_lockbreak(lock) || need_resched()) - return 1; +#ifdef CONFIG_PREEMPT + return spin_is_contended(lock); +#else return 0; +#endif } /* diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index c376f3b36c89..124449733c55 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -120,6 +120,12 @@ do { \ #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) +#ifdef CONFIG_GENERIC_LOCKBREAK +#define spin_is_contended(lock) ((lock)->break_lock) +#else +#define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) +#endif + /** * spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index f6a3a951b79e..68d88f71f1a2 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -19,7 +19,7 @@ typedef struct { raw_spinlock_t raw_lock; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +#ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK @@ -35,7 +35,7 @@ typedef struct { typedef struct { raw_rwlock_t raw_lock; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +#ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index ea54c4c9a4ec..938234c4a996 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -64,6 +64,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) # define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ +#define __raw_spin_is_contended(lock) (((void)(lock), 0)) + #define __raw_read_can_lock(lock) (((void)(lock), 1)) #define __raw_write_can_lock(lock) (((void)(lock), 1)) -- cgit v1.2.3 From cae4595764cb3b08f6517e99bac1e3862854b1a1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 30 Jan 2008 13:31:23 +0100 Subject: x86: make __{save,restore}_processor_state static .. allowing to remove their declarations from a global include file (the symbols don't exist for anything but x86). Likewise for 64-bits' fix_processor_context(), just that that one was properly declared in an arch-specific header. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/suspend.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 4360e0816956..40280df2a3db 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -211,9 +211,6 @@ static inline int hibernate(void) { return -ENOSYS; } #ifdef CONFIG_PM_SLEEP void save_processor_state(void); void restore_processor_state(void); -struct saved_context; -void __save_processor_state(struct saved_context *ctxt); -void __restore_processor_state(struct saved_context *ctxt); /* kernel/power/main.c */ extern struct blocking_notifier_head pm_chain_head; -- cgit v1.2.3 From bdf88217b70dbb18c4ee27a6c497286e040a6705 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:44 +0100 Subject: x86: user_regset header The new header defines the types struct user_regset and struct user_regset_view, with some associated declarations. This new set of interfaces will become the standard way for arch code to expose user-mode machine-specific state. A single set of entry points into arch code can do all the low-level work in one place to fill the needs of core dumps, ptrace, and any other user-mode debugging facilities that might come along in the future. For existing arch code to adapt to the user_regset interfaces, each arch can work from the code it already has to support core files and ptrace. The formats you want for user_regset are the core file formats. The only wrinkle in adapting old ptrace implementation code as user_regset get and set functions is that these functions can be called on current as well as on another task_struct that is stopped and switched out as for ptrace. For some kinds of machine state, you may have to load it directly from CPU registers or otherwise differently for current than for another thread. (Your core dump support already handles this in elf_core_copy_regs for current and elf_core_copy_task_regs for other tasks, so just check there.) The set function should also be made to work on current in case that entails some special cases, though this was never required before for ptrace. Adding this flexibility covers the arch needs to open the door to more sophisticated new debugging facilities that don't always need to context-switch to do every little thing. The copyin/copyout helper functions (in a later patch) relieve the arch code of most of the cumbersome details of the flexible get/set interfaces. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/regset.h | 206 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 include/linux/regset.h (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h new file mode 100644 index 000000000000..85d0fb0a014d --- /dev/null +++ b/include/linux/regset.h @@ -0,0 +1,206 @@ +/* + * User-mode machine state access + * + * Copyright (C) 2007 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * Red Hat Author: Roland McGrath. + */ + +#ifndef _LINUX_REGSET_H +#define _LINUX_REGSET_H 1 + +#include +#include +struct task_struct; +struct user_regset; + + +/** + * user_regset_active_fn - type of @active function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * + * Return -%ENODEV if not available on the hardware found. + * Return %0 if no interesting state in this thread. + * Return >%0 number of @size units of interesting state. + * Any get call fetching state beyond that number will + * see the default initialization state for this data, + * so a caller that knows what the default state is need + * not copy it all out. + * This call is optional; the pointer is %NULL if there + * is no inexpensive check to yield a value < @n. + */ +typedef int user_regset_active_fn(struct task_struct *target, + const struct user_regset *regset); + +/** + * user_regset_get_fn - type of @get function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @pos: offset into the regset data to access, in bytes + * @count: amount of data to copy, in bytes + * @kbuf: if not %NULL, a kernel-space pointer to copy into + * @ubuf: if @kbuf is %NULL, a user-space pointer to copy into + * + * Fetch register values. Return %0 on success; -%EIO or -%ENODEV + * are usual failure returns. The @pos and @count values are in + * bytes, but must be properly aligned. If @kbuf is non-null, that + * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then + * ubuf gives a userland pointer to access directly, and an -%EFAULT + * return value is possible. + */ +typedef int user_regset_get_fn(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); + +/** + * user_regset_set_fn - type of @set function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @pos: offset into the regset data to access, in bytes + * @count: amount of data to copy, in bytes + * @kbuf: if not %NULL, a kernel-space pointer to copy from + * @ubuf: if @kbuf is %NULL, a user-space pointer to copy from + * + * Store register values. Return %0 on success; -%EIO or -%ENODEV + * are usual failure returns. The @pos and @count values are in + * bytes, but must be properly aligned. If @kbuf is non-null, that + * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then + * ubuf gives a userland pointer to access directly, and an -%EFAULT + * return value is possible. + */ +typedef int user_regset_set_fn(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/** + * user_regset_writeback_fn - type of @writeback function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @immediate: zero if writeback at completion of next context switch is OK + * + * This call is optional; usually the pointer is %NULL. When + * provided, there is some user memory associated with this regset's + * hardware, such as memory backing cached register data on register + * window machines; the regset's data controls what user memory is + * used (e.g. via the stack pointer value). + * + * Write register data back to user memory. If the @immediate flag + * is nonzero, it must be written to the user memory so uaccess or + * access_process_vm() can see it when this call returns; if zero, + * then it must be written back by the time the task completes a + * context switch (as synchronized with wait_task_inactive()). + * Return %0 on success or if there was nothing to do, -%EFAULT for + * a memory problem (bad stack pointer or whatever), or -%EIO for a + * hardware problem. + */ +typedef int user_regset_writeback_fn(struct task_struct *target, + const struct user_regset *regset, + int immediate); + +/** + * struct user_regset - accessible thread CPU state + * @n: Number of slots (registers). + * @size: Size in bytes of a slot (register). + * @align: Required alignment, in bytes. + * @bias: Bias from natural indexing. + * @core_note_type: ELF note @n_type value used in core dumps. + * @get: Function to fetch values. + * @set: Function to store values. + * @active: Function to report if regset is active, or %NULL. + * @writeback: Function to write data back to user memory, or %NULL. + * + * This data structure describes a machine resource we call a register set. + * This is part of the state of an individual thread, not necessarily + * actual CPU registers per se. A register set consists of a number of + * similar slots, given by @n. Each slot is @size bytes, and aligned to + * @align bytes (which is at least @size). + * + * These functions must be called only on the current thread or on a + * thread that is in %TASK_STOPPED or %TASK_TRACED state, that we are + * guaranteed will not be woken up and return to user mode, and that we + * have called wait_task_inactive() on. (The target thread always might + * wake up for SIGKILL while these functions are working, in which case + * that thread's user_regset state might be scrambled.) + * + * The @pos argument must be aligned according to @align; the @count + * argument must be a multiple of @size. These functions are not + * responsible for checking for invalid arguments. + * + * When there is a natural value to use as an index, @bias gives the + * difference between the natural index and the slot index for the + * register set. For example, x86 GDT segment descriptors form a regset; + * the segment selector produces a natural index, but only a subset of + * that index space is available as a regset (the TLS slots); subtracting + * @bias from a segment selector index value computes the regset slot. + * + * If nonzero, @core_note_type gives the n_type field (NT_* value) + * of the core file note in which this regset's data appears. + * NT_PRSTATUS is a special case in that the regset data starts at + * offsetof(struct elf_prstatus, pr_reg) into the note data; that is + * part of the per-machine ELF formats userland knows about. In + * other cases, the core file note contains exactly the whole regset + * (@n * @size) and nothing else. The core file note is normally + * omitted when there is an @active function and it returns zero. + */ +struct user_regset { + user_regset_get_fn *get; + user_regset_set_fn *set; + user_regset_active_fn *active; + user_regset_writeback_fn *writeback; + unsigned int n; + unsigned int size; + unsigned int align; + unsigned int bias; + unsigned int core_note_type; +}; + +/** + * struct user_regset_view - available regsets + * @name: Identifier, e.g. UTS_MACHINE string. + * @regsets: Array of @n regsets available in this view. + * @n: Number of elements in @regsets. + * @e_machine: ELF header @e_machine %EM_* value written in core dumps. + * @e_flags: ELF header @e_flags value written in core dumps. + * @ei_osabi: ELF header @e_ident[%EI_OSABI] value written in core dumps. + * + * A regset view is a collection of regsets (&struct user_regset, + * above). This describes all the state of a thread that can be seen + * from a given architecture/ABI environment. More than one view might + * refer to the same &struct user_regset, or more than one regset + * might refer to the same machine-specific state in the thread. For + * example, a 32-bit thread's state could be examined from the 32-bit + * view or from the 64-bit view. Either method reaches the same thread + * register state, doing appropriate widening or truncation. + */ +struct user_regset_view { + const char *name; + const struct user_regset *regsets; + unsigned int n; + u32 e_flags; + u16 e_machine; + u8 ei_osabi; +}; + +/* + * This is documented here rather than at the definition sites because its + * implementation is machine-dependent but its interface is universal. + */ +/** + * task_user_regset_view - Return the process's native regset view. + * @tsk: a thread of the process in question + * + * Return the &struct user_regset_view that is native for the given process. + * For example, what it would access when it called ptrace(). + * Throughout the life of the process, this only changes at exec. + */ +const struct user_regset_view *task_user_regset_view(struct task_struct *tsk); + + +#endif /* */ -- cgit v1.2.3 From bae3f7c39dee5951bcbedeaedb6744f882a00173 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:45 +0100 Subject: x86: user_regset helpers This adds some inlines to linux/regset.h intended for arch code to use in its user_regset get and set functions. These make it pretty easy to deal with the interface's optional kernel-space or user-space pointers and its generalized access to a part of the register data at a time. In simple cases where the internal data structure matches the exported layout (core dump format), a get function can be nothing but a call to user_regset_copyout, and a set function a call to user_regset_copyin. In other cases the exported layout is usually made up of a few pieces each stored contiguously in a different internal data structure. These helpers make it straightforward to write a get or set function by processing each contiguous chunk of the data in order. The start_pos and end_pos arguments are always constants, so these inlines collapse to a small amount of code. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/regset.h | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h index 85d0fb0a014d..761c931af975 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -15,6 +15,7 @@ #include #include +#include struct task_struct; struct user_regset; @@ -203,4 +204,119 @@ struct user_regset_view { const struct user_regset_view *task_user_regset_view(struct task_struct *tsk); +/* + * These are helpers for writing regset get/set functions in arch code. + * Because @start_pos and @end_pos are always compile-time constants, + * these are inlined into very little code though they look large. + * + * Use one or more calls sequentially for each chunk of regset data stored + * contiguously in memory. Call with constants for @start_pos and @end_pos, + * giving the range of byte positions in the regset that data corresponds + * to; @end_pos can be -1 if this chunk is at the end of the regset layout. + * Each call updates the arguments to point past its chunk. + */ + +static inline int user_regset_copyout(unsigned int *pos, unsigned int *count, + void **kbuf, + void __user **ubuf, const void *data, + const int start_pos, const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + data += *pos - start_pos; + if (*kbuf) { + memcpy(*kbuf, data, copy); + *kbuf += copy; + } else if (__copy_to_user(*ubuf, data, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +static inline int user_regset_copyin(unsigned int *pos, unsigned int *count, + const void **kbuf, + const void __user **ubuf, void *data, + const int start_pos, const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + data += *pos - start_pos; + if (*kbuf) { + memcpy(data, *kbuf, copy); + *kbuf += copy; + } else if (__copy_from_user(data, *ubuf, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +/* + * These two parallel the two above, but for portions of a regset layout + * that always read as all-zero or for which writes are ignored. + */ +static inline int user_regset_copyout_zero(unsigned int *pos, + unsigned int *count, + void **kbuf, void __user **ubuf, + const int start_pos, + const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + if (*kbuf) { + memset(*kbuf, 0, copy); + *kbuf += copy; + } else if (__clear_user(*ubuf, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +static inline int user_regset_copyin_ignore(unsigned int *pos, + unsigned int *count, + const void **kbuf, + const void __user **ubuf, + const int start_pos, + const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + if (*kbuf) + *kbuf += copy; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + + #endif /* */ -- cgit v1.2.3 From 5bde4d181793be84351bc21c256d8c71cfcd313a Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:47 +0100 Subject: x86: user_regset user-copy helpers This defines two new inlines in linux/regset.h, for use in arch_ptrace implementations and the like. These provide simplified wrappers for using the user_regset interfaces to copy thread regset data into the caller's user-space memory. The inlines are trivial, but make the common uses in places such as ptrace implementation much more concise, easier to read, and less prone to code-copying errors. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/regset.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h index 761c931af975..8abee6556223 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -318,5 +318,51 @@ static inline int user_regset_copyin_ignore(unsigned int *pos, return 0; } +/** + * copy_regset_to_user - fetch a thread's user_regset data into user memory + * @target: thread to be examined + * @view: &struct user_regset_view describing user thread machine state + * @setno: index in @view->regsets + * @offset: offset into the regset data, in bytes + * @size: amount of data to copy, in bytes + * @data: user-mode pointer to copy into + */ +static inline int copy_regset_to_user(struct task_struct *target, + const struct user_regset_view *view, + unsigned int setno, + unsigned int offset, unsigned int size, + void __user *data) +{ + const struct user_regset *regset = &view->regsets[setno]; + + if (!access_ok(VERIFY_WRITE, data, size)) + return -EIO; + + return regset->get(target, regset, offset, size, NULL, data); +} + +/** + * copy_regset_from_user - store into thread's user_regset data from user memory + * @target: thread to be examined + * @view: &struct user_regset_view describing user thread machine state + * @setno: index in @view->regsets + * @offset: offset into the regset data, in bytes + * @size: amount of data to copy, in bytes + * @data: user-mode pointer to copy from + */ +static inline int copy_regset_from_user(struct task_struct *target, + const struct user_regset_view *view, + unsigned int setno, + unsigned int offset, unsigned int size, + const void __user *data) +{ + const struct user_regset *regset = &view->regsets[setno]; + + if (!access_ok(VERIFY_READ, data, size)) + return -EIO; + + return regset->set(target, regset, offset, size, NULL, data); +} + #endif /* */ -- cgit v1.2.3 From 032d82d9065dec0e26718eca376c2029e4bd0595 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:47 +0100 Subject: x86: compat_ptrace_request This adds a compat_ptrace_request that is the analogue of ptrace_request for the things that 32-on-64 ptrace implementations can share in common. So far there are just a couple of requests handled generically. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/compat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index ba29d4c59643..a907fbede6c3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -243,6 +243,10 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, const compat_ulong_t __user *new_nodes); +extern int compat_ptrace_request(struct task_struct *child, + compat_long_t request, + compat_ulong_t addr, compat_ulong_t data); + /* * epoll (fs/eventpoll.c) compat bits follow ... */ -- cgit v1.2.3 From c269f19617f508cc5c29c0b064c1a437d7011a46 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:48 +0100 Subject: x86: compat_sys_ptrace This adds a generic definition of compat_sys_ptrace that calls compat_arch_ptrace, parallel to sys_ptrace/arch_ptrace. Some machines needing this already define a function by that name. The new generic function is defined only on machines that put #define __ARCH_WANT_COMPAT_SYS_PTRACE into asm/ptrace.h. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/compat.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index a907fbede6c3..d38655f2be70 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -247,6 +247,13 @@ extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); +#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE +extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t addr, compat_ulong_t data); +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_long_t addr, compat_long_t data); +#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ + /* * epoll (fs/eventpoll.c) compat bits follow ... */ -- cgit v1.2.3 From bb61682b3f31dec7d058cae2f6edd2275248a704 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:56 +0100 Subject: x86: x86 core dump TLS This makes ELF core dumps of 32-bit processes include a new note type NT_386_TLS (0x200) giving the contents of the TLS slots in struct user_desc format. This lets post mortem examination figure out what the segment registers mean like the debugger does with get_thread_area on a live process. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 576e83bd6d88..7ceb24d87c1a 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -355,6 +355,7 @@ typedef struct elf64_shdr { #define NT_AUXV 6 #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ #define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ +#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ /* Note header in a PT_NOTE section */ -- cgit v1.2.3 From 74ef649fe847fdfbd3e1732d21b923f59ca04e8c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 30 Jan 2008 13:32:42 +0100 Subject: x86: add _AT() macro to conditionally cast # HG changeset patch # User Jeremy Fitzhardinge # Date 1199317452 28800 # Node ID f7e7db3facd9406545103164f9be8f9ba1a2b549 # Parent 4d9a413a0f4c1d98dbea704f0366457b5117045d x86: add _AT() macro to conditionally cast Define _AT(type, value) to conditionally cast a value when compiling C code, but not when used in assembler. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/const.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/const.h b/include/linux/const.h index 07b300bfe34b..c22c707c455d 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -7,13 +7,18 @@ * C code. Therefore we cannot annotate them always with * 'UL' and other type specifiers unilaterally. We * use the following macros to deal with this. + * + * Similarly, _AT() will cast an expression with a type in C, but + * leave it unchanged in asm. */ #ifdef __ASSEMBLY__ #define _AC(X,Y) X +#define _AT(T,X) X #else #define __AC(X,Y) (X##Y) #define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) #endif #endif /* !(_LINUX_CONST_H) */ -- cgit v1.2.3 From 5280e004fc22314122c84978c0b6a741cf96dc0f Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 30 Jan 2008 13:32:52 +0100 Subject: percpu: move arch XX_PER_CPU_XX definitions into linux/percpu.h - Special consideration for IA64: Add the ability to specify arch specific per cpu flags - remove .data.percpu attribute from DEFINE_PER_CPU for non-smp case. The arch definitions are all the same. So move them into linux/percpu.h. We cannot move DECLARE_PER_CPU since some include files just include asm/percpu.h to avoid include recursion problems. Cc: Rusty Russell Cc: Andi Kleen Signed-off-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 926adaae0f96..00412bb494c4 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,6 +9,30 @@ #include +#ifndef PER_CPU_ATTRIBUTES +#define PER_CPU_ATTRIBUTES +#endif + +#ifdef CONFIG_SMP +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp +#else +#define DEFINE_PER_CPU(type, name) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) +#endif + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) + /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM #ifdef CONFIG_MODULES -- cgit v1.2.3 From 8c1c9356429741a82ff176d0f3400fb9e06b2a30 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Wed, 30 Jan 2008 13:32:53 +0100 Subject: x86: kprobes: add kprobes smoke tests that run on boot Here is a quick and naive smoke test for kprobes. This is intended to just verify if some unrelated change broke the *probes subsystem. It is self contained, architecture agnostic and isn't of any great use by itself. This needs to be built in the kernel and runs a basic set of tests to verify if kprobes, jprobes and kretprobes run fine on the kernel. In case of an error, it'll print out a message with a "BUG" prefix. This is a start; we intend to add more tests to this bucket over time. Thanks to Jim Keniston and Masami Hiramatsu for comments and suggestions. Tested on x86 (32/64) and powerpc. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 81891581e89b..6168c0a44172 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -182,6 +182,15 @@ static inline void kretprobe_assert(struct kretprobe_instance *ri, } } +#ifdef CONFIG_KPROBES_SANITY_TEST +extern int init_test_probes(void); +#else +static inline int init_test_probes(void) +{ + return 0; +} +#endif /* CONFIG_KPROBES_SANITY_TEST */ + extern spinlock_t kretprobe_lock; extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); @@ -227,6 +236,7 @@ void unregister_kretprobe(struct kretprobe *rp); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); + #else /* CONFIG_KPROBES */ #define __kprobes /**/ -- cgit v1.2.3 From d50efc6c40620b2e11648cac64ebf4a824e40382 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:33:00 +0100 Subject: x86: fix UML and -regparm=3 introduce the "asmregparm" calling convention: for functions implemented in assembly with a fixed regparm input parameters calling convention. mark the semaphore and rwsem slowpath functions with that. Signed-off-by: Ingo Molnar Signed-off-by: Miklos Szeredi Signed-off-by: Thomas Gleixner --- include/linux/linkage.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index ff203dd02919..ceeeb5ea5b15 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -13,6 +13,10 @@ #define asmlinkage CPP_ASMLINKAGE #endif +#ifndef asmregparm +# define asmregparm +#endif + #ifndef prevent_tail_call # define prevent_tail_call(ret) do { } while (0) #endif -- cgit v1.2.3 From 076f9776f5d8d131b36955db8641aba3893c2c1b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:33:06 +0100 Subject: x86: make early printk selectable on 64-bit as well Enable CONFIG_EMBEDDED to select CONFIG_EARLY_PRINTK on 64-bit as well. saves ~2K: text data bss dec hex filename 7290283 3672091 1907848 12870222 c4624e vmlinux.before 7288373 3671795 1907848 12868016 c459b0 vmlinux.after Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/kernel.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a7283c9beadf..ff356b2ee478 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -194,6 +194,9 @@ static inline int log_buf_read(int idx) { return 0; } static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } #endif +extern void __attribute__((format(printf, 1, 2))) + early_printk(const char *fmt, ...); + unsigned long int_sqrt(unsigned long); extern int printk_ratelimit(void); -- cgit v1.2.3 From 6b8be6df7f971919622d152d144c8798ad7fd160 Mon Sep 17 00:00:00 2001 From: John Reiser Date: Wed, 30 Jan 2008 13:33:13 +0100 Subject: x86: add ENDPROC() markers The ENDPROCs() were not used everywhere. Some code used just END() instead, while other code used nothing. um/sys-i386/checksum.S didn't #include . I also got confused because gcc puts the .type near the ENTRY, while ENDPROC puts it on the opposite end. Signed off by: John Reiser Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/linkage.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index ceeeb5ea5b15..3faf599ea58e 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -57,6 +57,10 @@ .size name, .-name #endif +/* If symbol 'name' is treated as a subroutine (gets called, and returns) + * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of + * static analysis tools such as stack depth analyzer. + */ #ifndef ENDPROC #define ENDPROC(name) \ .type name, @function; \ -- cgit v1.2.3 From ca74a6f84e68b44867022f4a4f3ec17c087c864e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 30 Jan 2008 13:33:17 +0100 Subject: x86: optimize lock prefix switching to run less frequently On VMs implemented using JITs that cache translated code changing the lock prefixes is a quite costly operation that forces the JIT to throw away and retranslate a lot of code. Previously a SMP kernel would rewrite the locks once for each CPU which is quite unnecessary. This patch changes the code to never switch at boot in the normal case (SMP kernel booting with >1 CPU) or only once for SMP kernel on UP. This makes a significant difference in boot up performance on AMD SimNow! Also I expect it to be a little faster on native systems too because a smp switch does a lot of text_poke()s which each synchronize the pipeline. v1->v2: Rename max_cpus v1->v2: Fix off by one in UP check (Thomas Gleixner) Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/smp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index c25e66bcecf3..55232ccf9cfd 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -78,6 +78,8 @@ int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait); */ void smp_prepare_boot_cpu(void); +extern unsigned int setup_max_cpus; + #else /* !SMP */ /* -- cgit v1.2.3 From 03252919b79891063cf99145612360efbdf9500b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 30 Jan 2008 13:33:18 +0100 Subject: x86: print which shared library/executable faulted in segfault etc. messages v3 They now look like: hal-resmgr[13791]: segfault at 3c rip 2b9c8caec182 rsp 7fff1e825d30 error 4 in libacl.so.1.1.0[2b9c8caea000+6000] This makes it easier to pinpoint bugs to specific libraries. And printing the offset into a mapping also always allows to find the correct fault point in a library even with randomized mappings. Previously there was no way to actually find the correct code address inside the randomized mapping. Relies on earlier patch to shorten the printk formats. They are often now longer than 80 characters, but I think that's worth it. [includes fix from Eric Dumazet to check d_path error value] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1897ca223eca..3c22d971afa7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1146,6 +1146,7 @@ extern int randomize_va_space; #endif const char * arch_vma_name(struct vm_area_struct *vma); +void print_vma_addr(char *prefix, unsigned long rip); struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -- cgit v1.2.3 From 0acf8e3447b893ff921863c2a4258e210d584452 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 30 Jan 2008 13:33:36 +0100 Subject: pci: add PCI identifiers for the RDC devices This patch defines the PCI identifiers found in the RDC R-321x System-on-Chip. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/pci_ids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c69531348363..41f6f28690f6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2085,6 +2085,13 @@ #define PCI_VENDOR_ID_BELKIN 0x1799 #define PCI_DEVICE_ID_BELKIN_F5D7010V7 0x701f +#define PCI_VENDOR_ID_RDC 0x17f3 +#define PCI_DEVICE_ID_RDC_R6020 0x6020 +#define PCI_DEVICE_ID_RDC_R6030 0x6030 +#define PCI_DEVICE_ID_RDC_R6040 0x6040 +#define PCI_DEVICE_ID_RDC_R6060 0x6060 +#define PCI_DEVICE_ID_RDC_R6061 0x6061 + #define PCI_VENDOR_ID_LENOVO 0x17aa #define PCI_VENDOR_ID_ARECA 0x17d3 -- cgit v1.2.3 From a5a19c63f4e55e32dc0bc3d936d7f94793d8b380 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 30 Jan 2008 13:33:39 +0100 Subject: x86: demacro asm-x86/pgalloc_32.h Convert macros into inline functions, for better type-checking. This patch required a little bit of fiddling with headers in order to make __(pte|pmd)_free_tlb inline rather than macros. asm-generic/tlb.h includes asm/pgalloc.h, though it doesn't directly use any pgalloc definitions. I removed this include to avoid an include cycle, but it may cause secondary compile failures by things depending on the indirect inclusion; arch/x86/mm/hugetlbpage.c was one such place; there may be others. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4f3838adbb30..2c3ce4c69b25 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 12d6f21eacc21d84a809829543f2fe45c7e37319 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:33:58 +0100 Subject: x86: do not PSE on CONFIG_DEBUG_PAGEALLOC=y get more testing of the c_p_a() code done by not turning off PSE on DEBUG_PAGEALLOC. this simplifies the early pagetable setup code, and tests the largepage-splitup code quite heavily. In the end, all the largepages will be split up pretty quickly, so there's no difference to how DEBUG_PAGEALLOC worked before. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mm.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3c22d971afa7..1bba6789a50a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1118,9 +1118,21 @@ static inline void vm_stat_account(struct mm_struct *mm, } #endif /* CONFIG_PROC_FS */ -#ifndef CONFIG_DEBUG_PAGEALLOC +#ifdef CONFIG_DEBUG_PAGEALLOC +extern int debug_pagealloc_enabled; + +extern void kernel_map_pages(struct page *page, int numpages, int enable); + +static inline void enable_debug_pagealloc(void) +{ + debug_pagealloc_enabled = 1; +} +#else static inline void kernel_map_pages(struct page *page, int numpages, int enable) {} +static inline void enable_debug_pagealloc(void) +{ +} #endif extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); -- cgit v1.2.3 From f212ec4b7b4d84290f12c9c0416cdea283bf5f40 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 30 Jan 2008 13:34:11 +0100 Subject: x86: early boot debugging via FireWire (ohci1394_dma=early) This patch adds a new configuration option, which adds support for a new early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch() to decide wether OHCI-1394 FireWire controllers should be initialized and enabled for physical DMA access to allow remote debugging of early problems like issues ACPI or other subsystems which are executed very early. If the config option is not enabled, no code is changed, and if the boot paramenter is not given, no new code is executed, and independent of that, all new code is freed after boot, so the config option can be even enabled in standard, non-debug kernels. With specialized tools, it is then possible to get debugging information from machines which have no serial ports (notebooks) such as the printk buffer contents, or any data which can be referenced from global pointers, if it is stored below the 4GB limit and even memory dumps of of the physical RAM region below the 4GB limit can be taken without any cooperation from the CPU of the host, so the machine can be crashed early, it does not matter. In the extreme, even kernel debuggers can be accessed in this way. I wrote a small kgdb module and an accompanying gdb stub for FireWire which allows to gdb to talk to kgdb using remote remory reads and writes over FireWire. An version of the gdb stub fore FireWire is able to read all global data from a system which is running a a normal kernel without any kernel debugger, without any interruption or support of the system's CPU. That way, e.g. the task struct and so on can be read and even manipulated when the physical DMA access is granted. A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt and I've put a copy online at ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt It also has links to all the tools which are available to make use of it another copy of it is online at: ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff Signed-Off-By: Bernhard Kaindl Tested-By: Thomas Renninger Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/init_ohci1394_dma.h | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 include/linux/init_ohci1394_dma.h (limited to 'include/linux') diff --git a/include/linux/init_ohci1394_dma.h b/include/linux/init_ohci1394_dma.h new file mode 100644 index 000000000000..3c03a4bba5e4 --- /dev/null +++ b/include/linux/init_ohci1394_dma.h @@ -0,0 +1,4 @@ +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT +extern int __initdata init_ohci1394_dma_early; +extern void __init init_ohci1394_dma_on_all_controllers(void); +#endif -- cgit v1.2.3