From 747ada36ee23225d81657e4d633ac93b8ccbea7d Mon Sep 17 00:00:00 2001 From: Olaf Dabrunz Date: Wed, 11 Jun 2008 16:35:13 +0200 Subject: pci: add PCI IDs for devices that need boot irq quirks Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 65953822c9cb..7f3f101e03c1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2235,6 +2235,10 @@ #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 #define PCI_DEVICE_ID_INTEL_PXH_1 0x032A #define PCI_DEVICE_ID_INTEL_PXHV 0x032C +#define PCI_DEVICE_ID_INTEL_80332_0 0x0330 +#define PCI_DEVICE_ID_INTEL_80332_1 0x0332 +#define PCI_DEVICE_ID_INTEL_80333_0 0x0370 +#define PCI_DEVICE_ID_INTEL_80333_1 0x0372 #define PCI_DEVICE_ID_INTEL_82375 0x0482 #define PCI_DEVICE_ID_INTEL_82424 0x0483 #define PCI_DEVICE_ID_INTEL_82378 0x0484 @@ -2307,6 +2311,7 @@ #define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4 #define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6 #define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab +#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac #define PCI_DEVICE_ID_INTEL_82820_HB 0x2500 #define PCI_DEVICE_ID_INTEL_82820_UP_HB 0x2501 #define PCI_DEVICE_ID_INTEL_82850_HB 0x2530 -- cgit v1.2.3 From e1d3a90846b40ad3160bf4b648d36c6badad39ac Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 11 Jun 2008 16:35:17 +0200 Subject: pci, acpi: reroute PCI interrupt to legacy boot interrupt equivalent Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does during interrupt handling). On chipsets where this INTx generation cannot be disabled, we reroute the valid interrupts to their legacy equivalent to get rid of spurious interrupts that might otherwise bring down (vital) interrupt lines through spurious interrupt detection in note_interrupt(). This patch benefited from discussions with Alexander Graf, Torsten Duwe, Ihno Krumreich, Daniel Gollub, Hannes Reinecke. The conclusions we drew and the patch itself are the authors' responsibility alone. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- include/linux/pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index d18b1dd49fab..6755cf5ac109 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -117,6 +117,11 @@ enum pci_dev_flags { PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1, }; +enum pci_irq_reroute_variant { + INTEL_IRQ_REROUTE_VARIANT = 1, + MAX_IRQ_REROUTE_VARIANTS = 3 +}; + typedef unsigned short __bitwise pci_bus_flags_t; enum pci_bus_flags { PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, @@ -194,6 +199,7 @@ struct pci_dev { unsigned int no_d1d2:1; /* only allow d0 or d3 */ unsigned int block_ucfg_access:1; /* userspace config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ + unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ unsigned int msi_enabled:1; unsigned int msix_enabled:1; unsigned int is_managed:1; -- cgit v1.2.3 From bd8fbdee6562ee526f3c2582a3b373ef195015dd Mon Sep 17 00:00:00 2001 From: Rui Sousa Date: Wed, 3 Sep 2008 17:53:07 +0200 Subject: lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set This patch fixes compilation if CONFIG_TRACE_IRQFLAGS_SUPPORT is ever disabled (which is currently not allowed by Kconfig). Alternatively we could just remove the option altogether and the associated code paths. Since the compilation error has been in the tree for at least two years and no one noticed it, I guess we don't really have the need for CONFIG_TRACE_IRQFLAGS_SUPPORT=n. Boot tested on x86 UP. Signed-off-by: Rui Sousa Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 74bde13224c9..f2993512b3b5 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -52,10 +52,10 @@ # define start_critical_timings() do { } while (0) #endif -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - #include +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ @@ -84,21 +84,20 @@ * The local_irq_*() APIs are equal to the raw_local_irq*() * if !TRACE_IRQFLAGS. */ -# define raw_local_irq_disable() local_irq_disable() -# define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) \ +#define local_irq_disable() raw_local_irq_disable() +#define local_irq_enable() raw_local_irq_enable() +#define local_irq_save(flags) \ do { \ typecheck(unsigned long, flags); \ - local_irq_save(flags); \ + raw_local_irq_save(flags); \ } while (0) -# define raw_local_irq_restore(flags) \ +# define local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ - local_irq_restore(flags); \ + raw_local_irq_restore(flags); \ } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define safe_halt() \ do { \ trace_hardirqs_on(); \ @@ -124,6 +123,5 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) -#endif /* CONFIG_X86 */ #endif -- cgit v1.2.3 From ab7476cf76e560f0efda2a631a70aabe93009025 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 15 Aug 2008 15:29:38 -0700 Subject: debug: add notifier chain debugging, v2 - unbreak ia64 (and powerpc) where function pointers dont point at code but at data (reported by Tony Luck) [ mingo@elte.hu: various cleanups ] Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6d..4e1366b552ae 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -187,6 +187,9 @@ extern unsigned long long memparse(char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); +extern int func_ptr_is_kernel_text(void *ptr); +extern void *dereference_function_descriptor(void *ptr); + struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); -- cgit v1.2.3 From 76b189e91845eab3a9d52bb97f971d312d25652d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Sep 2008 09:57:35 +0200 Subject: lockdep: add might_lock() / might_lock_read() useful to establish a lock dependency in case the actual dependency is rare or hard to trigger. Signed-off-by: Peter Zijlstra Acked-by: Nick Piggin Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 331e5f1c2d8e..0aa657aa8a1e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -480,4 +480,22 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define lock_map_release(l) do { } while (0) #endif +#ifdef CONFIG_PROVE_LOCKING +# define might_lock(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +# define might_lock_read(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +#else +# define might_lock(lock) do { } while (0) +# define might_lock_read(lock) do { } while (0) +#endif + #endif /* __LINUX_LOCKDEP_H */ -- cgit v1.2.3 From 3ee1afa308f2a38e5d1e2ad3752ad7abcf480da1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 10 Sep 2008 13:37:17 +0200 Subject: x86: some lock annotations for user copy paths, v2 - introduce might_fault() - handle the atomic user copy paths correctly [ mingo@elte.hu: move might_sleep() outside of in_atomic(). ] Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6d..e580ec095765 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -140,6 +140,15 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) +#ifdef CONFIG_PROVE_LOCKING +void might_fault(void); +#else +static inline void might_fault(void) +{ + might_sleep(); +} +#endif + extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(long time); NORET_TYPE void panic(const char * fmt, ...) -- cgit v1.2.3 From 1d18ef489509314506328b9e464dd47c24c1d68f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Sep 2008 20:53:21 +0200 Subject: x86: some lock annotations for user copy paths, v3 - add annotation back to clear_user() - change probe_kernel_address() to _inatomic*() method Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index fec6decfb983..2062293e57e6 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ + ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ -- cgit v1.2.3 From 53b9d87f41a3d8838210ad7cdef02d814817ce85 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 11 Sep 2008 17:02:58 -0700 Subject: lock debug: sit tight when we are already in a panic in: > http://bugzilla.kernel.org/show_bug.cgi?id=11543 The panic code called the kexec code which called mutex_trylock() which called spin_lock_mutex() which then stupidly went and blurted a load of debug stuff because of in_interrupt(). Keep the lock debug code from escallating an already crappy situation. Signed-off-by: Ingo Molnar --- include/linux/debug_locks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 4aaa4afb1cb9..096476f1fb35 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -17,7 +17,7 @@ extern int debug_locks_off(void); ({ \ int __ret = 0; \ \ - if (unlikely(c)) { \ + if (!oops_in_progress && unlikely(c)) { \ if (debug_locks_off() && !debug_locks_silent) \ WARN_ON(1); \ __ret = 1; \ -- cgit v1.2.3 From 30742d5c2277c325fb0e9d2d817d55a19995fe8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:43:39 +0200 Subject: Revert "lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set" This reverts commit bd8fbdee6562ee526f3c2582a3b373ef195015dd. This broke the powerpc build - more fixes are needed before we can undo this revert. --- include/linux/irqflags.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index f2993512b3b5..74bde13224c9 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -52,10 +52,10 @@ # define start_critical_timings() do { } while (0) #endif -#include - #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +#include + #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ @@ -84,20 +84,21 @@ * The local_irq_*() APIs are equal to the raw_local_irq*() * if !TRACE_IRQFLAGS. */ -#define local_irq_disable() raw_local_irq_disable() -#define local_irq_enable() raw_local_irq_enable() -#define local_irq_save(flags) \ +# define raw_local_irq_disable() local_irq_disable() +# define raw_local_irq_enable() local_irq_enable() +# define raw_local_irq_save(flags) \ do { \ typecheck(unsigned long, flags); \ - raw_local_irq_save(flags); \ + local_irq_save(flags); \ } while (0) -# define local_irq_restore(flags) \ +# define raw_local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ - raw_local_irq_restore(flags); \ + local_irq_restore(flags); \ } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define safe_halt() \ do { \ trace_hardirqs_on(); \ @@ -123,5 +124,6 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) +#endif /* CONFIG_X86 */ #endif -- cgit v1.2.3 From fb71e45338453698bd7460f7e8f171ea0304d218 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 15 Sep 2008 18:04:26 -0700 Subject: uaccess: fix parameters inversion for __copy_from_user_inatomic() The following patch changes to use __copy_from_user_inatomic(), but the passing parameters incorrect: x86: some lock annotations for user copy paths, v3 This fixes the netfilter crash reported by Steven Noonan. Reported-by: Steven Noonan Signed-off-by: Hiroshi Shimamoto Tested-by: Steven Noonan Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 2062293e57e6..6b58367d145e 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval)); \ + ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ -- cgit v1.2.3 From 38d47c1b7075bd7ec3881141bb3629da58f88dab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Sep 2008 19:32:20 +0200 Subject: futex: rely on get_user_pages() for shared futexes On the way of getting rid of the mmap_sem requirement for shared futexes, start by relying on get_user_pages(). Signed-off-by: Peter Zijlstra Acked-by: Nick Piggin Signed-off-by: Ingo Molnar --- include/linux/futex.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 586ab56a3ec3..8f627b9ae2b1 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -164,6 +164,8 @@ union futex_key { } both; }; +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } + #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); -- cgit v1.2.3 From c7e78cff6b7518212247fb20b1dc6411540dc9af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Oct 2008 23:17:09 +0200 Subject: lockstat: contend with points We currently only provide points that have to wait on contention, also lists the points we have to wait for. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0aa657aa8a1e..fc9f8e88123b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -73,6 +73,8 @@ struct lock_class_key { struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; }; +#define LOCKSTAT_POINTS 4 + /* * The lock-class itself: */ @@ -119,7 +121,8 @@ struct lock_class { int name_version; #ifdef CONFIG_LOCK_STAT - unsigned long contention_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; #endif }; @@ -144,6 +147,7 @@ enum bounce_type { struct lock_class_stats { unsigned long contention_point[4]; + unsigned long contending_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; @@ -165,6 +169,7 @@ struct lockdep_map { const char *name; #ifdef CONFIG_LOCK_STAT int cpu; + unsigned long ip; #endif }; @@ -355,7 +360,7 @@ struct lock_class_key { }; #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); -extern void lock_acquired(struct lockdep_map *lock); +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ @@ -363,13 +368,13 @@ do { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ - lock_acquired(&(_lock)->dep_map); \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) -#define lock_acquired(lockdep_map) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) -- cgit v1.2.3 From a53ccab3ccac9e8676a683df9822a2daec83ef54 Mon Sep 17 00:00:00 2001 From: Matthew Ranostay Date: Sat, 25 Oct 2008 01:05:04 -0400 Subject: ALSA: jack: lineout support to jack abstraction layer This patch introduces support for reporting SW_LINEOUT_INSERT detection events via the jack abstraction layer. Also adds a SND_JACK_LINEOUT define to the input system header. Signed-off-by: Matthew Ranostay Cc: Dmitry Torokhov Acked-by: Mark Brown Signed-off-by: Takashi Iwai --- include/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index a5802c9c81a4..7323d2ff5151 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -644,6 +644,7 @@ struct input_absinfo { #define SW_RADIO SW_RFKILL_ALL /* deprecated */ #define SW_MICROPHONE_INSERT 0x04 /* set = inserted */ #define SW_DOCK 0x05 /* set = plugged into dock */ +#define SW_LINEOUT_INSERT 0x06 /* set = inserted */ #define SW_MAX 0x0f #define SW_CNT (SW_MAX+1) -- cgit v1.2.3 From 505e371da195fad20cb8aaf45407a2849774d6d0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 15 Oct 2008 14:56:42 +0800 Subject: markers: remove exported symbol marker_probe_cb_noarg() marker_probe_cb_noarg() should not be seen by outer code. this patch remove it. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c7fbb1..4cf45472d9f5 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, void *call_private, ...); -extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...); /* * Connect a probe to a marker. -- cgit v1.2.3 From 944ac4259e39801c843a915c3da8194ac9af0440 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 19:26:08 -0400 Subject: ftrace: ftrace dump on oops control Impact: add (default-off) dump-trace-on-oops flag Currently, ftrace is set up to dump its contents to the console if the kernel panics or oops. This can be annoying if you have trace data in the buffers and you experience an oops, but the trace data is old or static. Usually when you want ftrace to dump its contents is when you are debugging your system and you have set up ftrace to trace the events leading to an oops. This patch adds a control variable called "ftrace_dump_on_oops" that will enable the ftrace dump to console on oops. This variable is default off but a developer can enable it either through the kernel command line by adding "ftrace_dump_on_oops" or at run time by setting (or disabling) /proc/sys/kernel/ftrace_dump_on_oops. v2: Replaced /** with /* as Randy explained that kernel-doc does not yet handle variables. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a3d46151be19..9623b7b9e5a5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -165,6 +165,8 @@ static inline void __ftrace_enabled_restore(int enabled) #endif #ifdef CONFIG_TRACING +extern int ftrace_dump_on_oops; + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -- cgit v1.2.3 From b67b4b117746aef686e527c3205792db0f2c9e16 Mon Sep 17 00:00:00 2001 From: Dominic Curran Date: Mon, 27 Oct 2008 22:30:53 -0400 Subject: Input: gpio-keys - add flag to allow auto repeat This patch adds a flag to gpio-key driver to turn on the input subsystems auto repeat feature if needed. Signed-off-by: Dominic Curran Signed-off-by: Dmitry Torokhov --- include/linux/gpio_keys.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index ec6ecd74781d..1289fa7623ca 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -15,6 +15,7 @@ struct gpio_keys_button { struct gpio_keys_platform_data { struct gpio_keys_button *buttons; int nbuttons; + unsigned int rep:1; /* enable input subsystem auto repeat */ }; #endif -- cgit v1.2.3 From cae1c11414912bf77a62aebd65ced321f0b9da51 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 27 Oct 2008 15:22:46 +0000 Subject: uwb: reference count reservations Reference counting the struct uwb_rsv's is safer and easier to get right than the transferring ownership of the structures from the PAL to reservation manager. This fixes an oops in the debug PAL after a reservation timed out. Signed-off-by: David Vrabel --- include/linux/uwb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index f9ccbd9a2ced..010ee708304d 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -201,6 +201,7 @@ struct uwb_rsv { struct uwb_rc *rc; struct list_head rc_node; struct list_head pal_node; + struct kref kref; struct uwb_dev *owner; struct uwb_rsv_target target; -- cgit v1.2.3 From 4d2bea4ca0adb4cebfbf89d34869c74081c42577 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 27 Oct 2008 15:42:31 +0000 Subject: wusb: do a proper channel stop When stopping the WUSB channel the host should send Channel Stop IEs giving the WUSB Channel Time of the last MMC. Both WHCI and HWA hosts provide a channel stop command for this. Signed-off-by: David Vrabel --- include/linux/usb/wusb-wa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h index a102561e7026..fb7c359bdfba 100644 --- a/include/linux/usb/wusb-wa.h +++ b/include/linux/usb/wusb-wa.h @@ -51,6 +51,7 @@ enum { WUSB_REQ_GET_TIME = 25, WUSB_REQ_SET_STREAM_IDX = 26, WUSB_REQ_SET_WUSB_MAS = 27, + WUSB_REQ_CHAN_STOP = 28, }; -- cgit v1.2.3 From 1cde7f68ced8d10a20dd2370e9d1d22ab3c1ea5c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 27 Oct 2008 16:48:09 +0000 Subject: uwb: order IEs by element ID ECMA-368 requires that IEs in a beacon must be sorted by element ID. Most hardware uses the ordering in the Set IE URC command so get the ordering right on the host. Also refactor the IE management code: - use uwb_ie_next() instead of uwb_ie_for_each(). - remove unnecessary functions. - API is now only uwb_rc_ie_add() and uwb_rc_ie_rm(). Signed-off-by: David Vrabel --- include/linux/uwb.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index 010ee708304d..6d93f54b8879 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -444,7 +444,6 @@ ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name, struct uwb_rccb *cmd, size_t cmd_size, u8 expected_type, u16 expected_event, struct uwb_rceb **preply); -ssize_t uwb_rc_get_ie(struct uwb_rc *, struct uwb_rc_evt_get_ie **); int uwb_bg_joined(struct uwb_rc *rc); size_t __uwb_addr_print(char *, size_t, const unsigned char *, int); @@ -653,22 +652,9 @@ static inline int edc_inc(struct edc *err_hist, u16 max_err, u16 timeframe) /* Information Element handling */ -/* For representing the state of writing to a buffer when iterating */ -struct uwb_buf_ctx { - char *buf; - size_t bytes, size; -}; - -typedef int (*uwb_ie_f)(struct uwb_dev *, const struct uwb_ie_hdr *, - size_t, void *); struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len); -ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data, - const void *buf, size_t size); -int uwb_ie_dump_hex(struct uwb_dev *, const struct uwb_ie_hdr *, - size_t, void *); -int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *); -struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len); - +int uwb_rc_ie_add(struct uwb_rc *uwb_rc, const struct uwb_ie_hdr *ies, size_t size); +int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id); /* * Transmission statistics -- cgit v1.2.3 From 5e1f8c9e20a92743eefc9a82c2db835213905e26 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 28 Oct 2008 13:21:55 -0400 Subject: ext3: Add support for non-native signed/unsigned htree hash algorithms The original ext3 hash algorithms assumed that variables of type char were signed, as God and K&R intended. Unfortunately, this assumption is not true on some architectures. Userspace support for marking filesystems with non-native signed/unsigned chars was added two years ago, but the kernel-side support was never added (until now). Signed-off-by: "Theodore Ts'o" Cc: akpm@linux-foundation.org Cc: linux-kernel@vger.kernel.org --- include/linux/ext3_fs.h | 28 +++++++++++++++++++++++++++- include/linux/ext3_fs_sb.h | 1 + 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d14f02918483..9004794a35fe 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -353,6 +353,13 @@ struct ext3_inode { #define EXT3_ERROR_FS 0x0002 /* Errors detected */ #define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */ +/* + * Misc. filesystem flags + */ +#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ +#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ +#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ + /* * Mount flags */ @@ -489,7 +496,23 @@ struct ext3_super_block { __u16 s_reserved_word_pad; __le32 s_default_mount_opts; __le32 s_first_meta_bg; /* First metablock block group */ - __u32 s_reserved[190]; /* Padding to the end of the block */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ + /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ +/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count */ + __le32 s_free_blocks_count_hi; /* Free blocks count */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ + __le32 s_flags; /* Miscellaneous flags */ + __le16 s_raid_stride; /* RAID stride */ + __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __le64 s_mmp_block; /* Block for multi-mount protection */ + __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ + __u8 s_log_groups_per_flex; /* FLEX_BG group size */ + __u8 s_reserved_char_pad2; + __le16 s_reserved_pad; + __u32 s_reserved[162]; /* Padding to the end of the block */ }; #ifdef __KERNEL__ @@ -694,6 +717,9 @@ static inline __le16 ext3_rec_len_to_disk(unsigned len) #define DX_HASH_LEGACY 0 #define DX_HASH_HALF_MD4 1 #define DX_HASH_TEA 2 +#define DX_HASH_LEGACY_UNSIGNED 3 +#define DX_HASH_HALF_MD4_UNSIGNED 4 +#define DX_HASH_TEA_UNSIGNED 5 #ifdef __KERNEL__ diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index e024e38248ff..a4e9216b3a6d 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -57,6 +57,7 @@ struct ext3_sb_info { u32 s_next_generation; u32 s_hash_seed[4]; int s_def_hash_version; + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; -- cgit v1.2.3 From def8b4faff5ca349beafbbfeb2c51f3602a6ef3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Oct 2008 13:24:06 -0700 Subject: net: reduce structures when XFRM=n ifdef out * struct sk_buff::sp (pointer) * struct dst_entry::xfrm (pointer) * struct sock::sk_policy (2 pointers) Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2725f4e5a9bf..487e34507b41 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -269,8 +269,9 @@ struct sk_buff { struct dst_entry *dst; struct rtable *rtable; }; +#ifdef CONFIG_XFRM struct sec_path *sp; - +#endif /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you @@ -1864,6 +1865,18 @@ static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_bu to->queue_mapping = from->queue_mapping; } +#ifdef CONFIG_XFRM +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return skb->sp; +} +#else +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return NULL; +} +#endif + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; -- cgit v1.2.3 From 3a2dfbe8acb154905fdc2fd03ec56df42e6c4cc4 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 28 Oct 2008 16:01:07 -0700 Subject: xfrm: Notify changes in UDP encapsulation via netlink Add new_mapping() implementation to the netlink xfrm_mgr to notify address/port changes detected in UDP encapsulated ESP packets. Signed-off-by: Martin Willi Signed-off-by: David S. Miller --- include/linux/xfrm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4bc1e6b86cb2..52f3abd453a1 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -199,6 +199,9 @@ enum { #define XFRM_MSG_NEWSPDINFO XFRM_MSG_NEWSPDINFO XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + + XFRM_MSG_MAPPING, +#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -438,6 +441,15 @@ struct xfrm_user_migrate { __u16 new_family; }; +struct xfrm_user_mapping { + struct xfrm_usersa_id id; + __u32 reqid; + xfrm_address_t old_saddr; + xfrm_address_t new_saddr; + __be16 old_sport; + __be16 new_sport; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 @@ -464,6 +476,8 @@ enum xfrm_nlgroups { #define XFRMNLGRP_REPORT XFRMNLGRP_REPORT XFRMNLGRP_MIGRATE, #define XFRMNLGRP_MIGRATE XFRMNLGRP_MIGRATE + XFRMNLGRP_MAPPING, +#define XFRMNLGRP_MAPPING XFRMNLGRP_MAPPING __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) -- cgit v1.2.3 From 0c6ce78abf6e228d44c3840edb8a4ae0c1299825 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 16:09:23 -0700 Subject: net: replace uses of NIP6_FMT with %p6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 6fd7b016517f..42e01c93c7ea 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -145,8 +145,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr, break; case AF_INET6: - snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", - NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), + snprintf(buf, len, "%p6, port=%u", + &((struct sockaddr_in6 *)addr)->sin6_addr, ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; -- cgit v1.2.3 From b189db5d299c6824780af5590564ff608adb3dea Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 22:38:52 -0700 Subject: net: remove NIP6(), NIP6_FMT, NIP6_SEQFMT and final users Open code NIP6_FMT in the one call inside sscanf and one user of NIP6() that could use %p6 in the netfilter code. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/kernel.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 396a350b87a6..77777c460099 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -357,18 +357,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte) ((unsigned char *)&addr)[3] #define NIPQUAD_FMT "%u.%u.%u.%u" -#define NIP6(addr) \ - ntohs((addr).s6_addr16[0]), \ - ntohs((addr).s6_addr16[1]), \ - ntohs((addr).s6_addr16[2]), \ - ntohs((addr).s6_addr16[3]), \ - ntohs((addr).s6_addr16[4]), \ - ntohs((addr).s6_addr16[5]), \ - ntohs((addr).s6_addr16[6]), \ - ntohs((addr).s6_addr16[7]) -#define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" -#define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x" - #if defined(__LITTLE_ENDIAN) #define HIPQUAD(addr) \ ((unsigned char *)&addr)[3], \ -- cgit v1.2.3 From a20c7ab570ffdce1d6f67c7acf8c1c502a3b3839 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 18:43:48 +0400 Subject: [MTD] sharpsl-nand: use platform_data for model-specific values Add platform_data which holds all model-specific values, like badblocks pattern, oobinfo, partitions. Signed-off-by: Dmitry Baryshkov --- include/linux/mtd/sharpsl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/mtd/sharpsl.h (limited to 'include/linux') diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h new file mode 100644 index 000000000000..25f4d2a845c1 --- /dev/null +++ b/include/linux/mtd/sharpsl.h @@ -0,0 +1,20 @@ +/* + * SharpSL NAND support + * + * Copyright (C) 2008 Dmitry Baryshkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +struct sharpsl_nand_platform_data { + struct nand_bbt_descr *badblock_pattern; + struct nand_ecclayout *ecc_layout; + struct mtd_partition *partitions; + unsigned int nr_partitions; +}; -- cgit v1.2.3 From 96631ed16c514cf8b28fab991a076985ce378c26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 11:19:58 -0700 Subject: udp: introduce sk_for_each_rcu_safenext() Corey Minyard found a race added in commit 271b72c7fa82c2c7a795bc16896149933110672d (udp: RCU handling for Unicast packets.) "If the socket is moved from one list to another list in-between the time the hash is calculated and the next field is accessed, and the socket has moved to the end of the new list, the traversal will not complete properly on the list it should have, since the socket will be on the end of the new list and there's not a way to tell it's on a new list and restart the list traversal. I think that this can be solved by pre-fetching the "next" field (with proper barriers) before checking the hash." This patch corrects this problem, introducing a new sk_for_each_rcu_safenext() macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3f2c97..3ba2998b22ba 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -383,5 +383,22 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) +/** + * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * @next: the &struct hlist_node to use as a next cursor + * + * Special version of hlist_for_each_entry_rcu that make sure + * each next pointer is fetched before each iteration. + */ +#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference(next)) + #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 5b095d98928fdb9e3b75be20a54b7a6cbf6ca9ad Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 29 Oct 2008 12:52:50 -0700 Subject: net: replace %p6 with %pI6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 42e01c93c7ea..51cb75ea42d5 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -145,7 +145,7 @@ static inline char *__svc_print_addr(struct sockaddr *addr, break; case AF_INET6: - snprintf(buf, len, "%p6, port=%u", + snprintf(buf, len, "%pI6, port=%u", &((struct sockaddr_in6 *)addr)->sin6_addr, ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; -- cgit v1.2.3 From 2cb1599f9b2ecdd7a9e59feeee647eb258966839 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:32:23 +1100 Subject: Inode: Allow external initialisers To allow XFS to combine the XFS and linux inodes into a single structure, we need to drive inode lookup from the XFS inode cache, not the generic inode cache. This means that we need initialise a struct inode from a context outside alloc_inode() as it is no longer used by XFS. Factor and export the struct inode initialisation code from alloc_inode() to inode_init_always() as a counterpart to inode_init_once(). i.e. we have to call this init function for each inode instantiation (always), as opposed inode_init_once() which is only called on slab object instantiation (once). Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b248d61430c..04abead4b021 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1881,6 +1881,7 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int origin); extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); +extern struct inode * inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); -- cgit v1.2.3 From 8290c35f87304a6b73d4fd17b03580b4f7425de8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:35:24 +1100 Subject: Inode: Allow external list initialisation To allow XFS to combine the XFS and linux inodes into a single structure, we need to drive inode lookup from the XFS inode cache, not the generic inode cache. This means that we need initialise a struct inode from a context outside alloc_inode() as it is no longer used by XFS. After inode allocation and initialisation, we need to add the inode to the superblock list, the in-use list, hash it and do some accounting. This all needs to be done with the inode_lock held and there are already several places in fs/inode.c that do this list manipulation. Factor out the common code, add a locking wrapper and export the function so ti can be called from XFS. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 04abead4b021..1deedf235d55 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1883,6 +1883,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); extern struct inode * inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); +extern void inode_add_to_lists(struct super_block *, struct inode *); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); -- cgit v1.2.3 From d98d38f2014ab79f28c126ff175d034891f7aefc Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 29 Oct 2008 14:24:09 -0700 Subject: mutex: improve header comment to be actually informative about the API Impact: improve documentation It's nice to say that mutex_trylock follows the spin_trylock convention. It's a lot nicer if the comment also says which that is... make it so. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/linux/mutex.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bc6da10ceee0..7a0e5c4f8072 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); /* * NOTE: mutex_trylock() follows the spin_trylock() convention, * not the down_trylock() convention! + * + * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); -- cgit v1.2.3 From 17666f02b118099028522dfc3df00a235700e216 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 30 Oct 2008 16:08:32 -0400 Subject: ftrace: nmi safe code modification Impact: fix crashes that can occur in NMI handlers, if their code is modified Modifying code is something that needs special care. On SMP boxes, if code that is being modified is also being executed on another CPU, that CPU will have undefined results. The dynamic ftrace uses kstop_machine to make the system act like a uniprocessor system. But this does not address NMIs, that can still run on other CPUs. One approach to handle this is to make all code that are used by NMIs not be traced. But NMIs can call notifiers that spread throughout the kernel and this will be very hard to maintain, and the chance of missing a function is very high. The approach that this patch takes is to have the NMIs modify the code if the modification is taking place. The way this works is that just writing to code executing on another CPU is not harmful if what is written is the same as what exists. Two buffers are used: an IP buffer and a "code" buffer. The steps that the patcher takes are: 1) Put in the instruction pointer into the IP buffer and the new code into the "code" buffer. 2) Set a flag that says we are modifying code 3) Wait for any running NMIs to finish. 4) Write the code 5) clear the flag. 6) Wait for any running NMIs to finish. If an NMI is executed, it will also write the pending code. Multiple writes are OK, because what is being written is the same. Then the patcher must wait for all running NMIs to finish before going to the next line that must be patched. This is basically the RCU approach to code modification. Thanks to Ingo Molnar for suggesting the idea, and to Arjan van de Ven for his guidence on what is safe and what is not. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a0..0087cb43becf 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -161,7 +162,17 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() \ + do { \ + ftrace_nmi_enter(); \ + lockdep_off(); \ + __irq_enter(); \ + } while (0) +#define nmi_exit() \ + do { \ + __irq_exit(); \ + lockdep_on(); \ + ftrace_nmi_exit(); \ + } while (0) #endif /* LINUX_HARDIRQ_H */ -- cgit v1.2.3 From 3685f25de1b0447fff381c420de1e25bd57c9efb Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 31 Oct 2008 00:56:49 -0700 Subject: misc: replace NIPQUAD() Using NIPQUAD() with NIPQUAD_FMT, %d.%d.%d.%d or %u.%u.%u.%u can be replaced with %pI4 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 51cb75ea42d5..0127daca4354 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -139,8 +139,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr, { switch (addr->sa_family) { case AF_INET: - snprintf(buf, len, "%u.%u.%u.%u, port=%u", - NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), + snprintf(buf, len, "%pI4, port=%u", + &((struct sockaddr_in *)addr)->sin_addr, ntohs(((struct sockaddr_in *) addr)->sin_port)); break; -- cgit v1.2.3 From 92be3d6bdf2cb34972ab50e12ad4da1076e690da Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 31 Oct 2008 09:48:08 +0800 Subject: kexec/i386: allocate page table pages dynamically Impact: save .text size when kexec is built in but not loaded This patch adds an architecture specific struct kimage_arch into struct kimage. The pointers to page table pages used by kexec are added to struct kimage_arch. The page tables pages are dynamically allocated in machine_kexec_prepare instead of statically from BSS segment. This will save up to 20k memory when kexec image is not loaded. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar --- include/linux/kexec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 17f76fc05173..adc34f2c6eff 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -100,6 +100,10 @@ struct kimage { #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 unsigned int preserve_context : 1; + +#ifdef ARCH_HAS_KIMAGE_ARCH + struct kimage_arch arch; +#endif }; -- cgit v1.2.3 From a26a2a27396c0a0877aa701f8f92d08ba550a6c9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 00:03:22 -0400 Subject: ftrace: nmi safe code clean ups Impact: cleanup This patch cleans up the NMI safe code for dynamic ftrace as suggested by Andrew Morton. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2b..22240dfe912e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); + /** * ftrace_modify_code - modify code segment * @ip: the address of the code segment -- cgit v1.2.3 From ae9eba0e2744f1aa15cdc97cd39277a84723ae23 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 30 Oct 2008 20:06:16 +0100 Subject: uwb: struct device - replace bus_id with dev_name(), dev_set_name() Cc: David Vrabel Acked-by: Greg Kroah-Hartman Signed-Off-By: Kay Sievers Signed-off-by: David Vrabel --- include/linux/uwb/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h index a86a73fe303f..67a240527145 100644 --- a/include/linux/uwb/debug.h +++ b/include/linux/uwb/debug.h @@ -60,7 +60,7 @@ do { \ snprintf(__head, sizeof(__head), \ "%s %s: ", \ dev_driver_string(__dev), \ - __dev->bus_id); \ + dev_name(__dev)); \ } \ printk(KERN_ERR "%s%s" _tag ": " f, __head, \ __func__, ## a); \ -- cgit v1.2.3 From d9fe60dea7779d412b34679f1177c5ca1940ea8d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:13:49 +0200 Subject: 802.11: clean up/fix HT support This patch cleans up a number of things: * the unusable definition of the HT capabilities/HT information information elements * variable names that are hard to understand * mac80211: move ieee80211_handle_ht to ht.c and remove the unused enable_ht parameter * mac80211: fix bug with MCS rate 32 in ieee80211_handle_ht * mac80211: fix bug with casting the result of ieee80211_bss_get_ie to an information element _contents_ rather than the whole element, add size checking (another out-of-bounds access bug fixed!) * mac80211: remove some unused return values in favour of BUG_ON checking * a few minor other things Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 133 +++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 14126bc36641..64a4abce6d91 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -685,28 +685,88 @@ struct ieee80211_bar { #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL 0x0000 #define IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA 0x0004 + +#define IEEE80211_HT_MCS_MASK_LEN 10 + +/** + * struct ieee80211_mcs_info - MCS information + * @rx_mask: RX mask + * @rx_highest: highest supported RX rate + * @tx_params: TX parameters + */ +struct ieee80211_mcs_info { + u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN]; + __le16 rx_highest; + u8 tx_params; + u8 reserved[3]; +} __attribute__((packed)); + +/* 802.11n HT capability MSC set */ +#define IEEE80211_HT_MCS_RX_HIGHEST_MASK 0x3ff +#define IEEE80211_HT_MCS_TX_DEFINED 0x01 +#define IEEE80211_HT_MCS_TX_RX_DIFF 0x02 +/* value 0 == 1 stream etc */ +#define IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK 0x0C +#define IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT 2 +#define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 +#define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 + +/* + * 802.11n D5.0 20.3.5 / 20.6 says: + * - indices 0 to 7 and 32 are single spatial stream + * - 8 to 31 are multiple spatial streams using equal modulation + * [8..15 for two streams, 16..23 for three and 24..31 for four] + * - remainder are multiple spatial streams using unequal modulation + */ +#define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START 33 +#define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE \ + (IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8) + /** * struct ieee80211_ht_cap - HT capabilities * - * This structure refers to "HT capabilities element" as - * described in 802.11n draft section 7.3.2.52 + * This structure is the "HT capabilities element" as + * described in 802.11n D5.0 7.3.2.57 */ struct ieee80211_ht_cap { __le16 cap_info; u8 ampdu_params_info; - u8 supp_mcs_set[16]; + + /* 16 bytes MCS information */ + struct ieee80211_mcs_info mcs; + __le16 extended_ht_cap_info; __le32 tx_BF_cap_info; u8 antenna_selection_info; } __attribute__ ((packed)); +/* 802.11n HT capabilities masks (for cap_info) */ +#define IEEE80211_HT_CAP_LDPC_CODING 0x0001 +#define IEEE80211_HT_CAP_SUP_WIDTH_20_40 0x0002 +#define IEEE80211_HT_CAP_SM_PS 0x000C +#define IEEE80211_HT_CAP_GRN_FLD 0x0010 +#define IEEE80211_HT_CAP_SGI_20 0x0020 +#define IEEE80211_HT_CAP_SGI_40 0x0040 +#define IEEE80211_HT_CAP_TX_STBC 0x0080 +#define IEEE80211_HT_CAP_RX_STBC 0x0300 +#define IEEE80211_HT_CAP_DELAY_BA 0x0400 +#define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 +#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 +#define IEEE80211_HT_CAP_PSMP_SUPPORT 0x2000 +#define IEEE80211_HT_CAP_40MHZ_INTOLERANT 0x4000 +#define IEEE80211_HT_CAP_LSIG_TXOP_PROT 0x8000 + +/* 802.11n HT capability AMPDU settings (for ampdu_params_info) */ +#define IEEE80211_HT_AMPDU_PARM_FACTOR 0x03 +#define IEEE80211_HT_AMPDU_PARM_DENSITY 0x1C + /** - * struct ieee80211_ht_cap - HT additional information + * struct ieee80211_ht_info - HT information * - * This structure refers to "HT information element" as - * described in 802.11n draft section 7.3.2.53 + * This structure is the "HT information element" as + * described in 802.11n D5.0 7.3.2.58 */ -struct ieee80211_ht_addt_info { +struct ieee80211_ht_info { u8 control_chan; u8 ht_param; __le16 operation_mode; @@ -714,36 +774,33 @@ struct ieee80211_ht_addt_info { u8 basic_set[16]; } __attribute__ ((packed)); -/* 802.11n HT capabilities masks */ -#define IEEE80211_HT_CAP_SUP_WIDTH 0x0002 -#define IEEE80211_HT_CAP_SM_PS 0x000C -#define IEEE80211_HT_CAP_GRN_FLD 0x0010 -#define IEEE80211_HT_CAP_SGI_20 0x0020 -#define IEEE80211_HT_CAP_SGI_40 0x0040 -#define IEEE80211_HT_CAP_DELAY_BA 0x0400 -#define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 -#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 -/* 802.11n HT capability AMPDU settings */ -#define IEEE80211_HT_CAP_AMPDU_FACTOR 0x03 -#define IEEE80211_HT_CAP_AMPDU_DENSITY 0x1C -/* 802.11n HT capability MSC set */ -#define IEEE80211_SUPP_MCS_SET_UEQM 4 -#define IEEE80211_HT_CAP_MAX_STREAMS 4 -#define IEEE80211_SUPP_MCS_SET_LEN 10 -/* maximum streams the spec allows */ -#define IEEE80211_HT_CAP_MCS_TX_DEFINED 0x01 -#define IEEE80211_HT_CAP_MCS_TX_RX_DIFF 0x02 -#define IEEE80211_HT_CAP_MCS_TX_STREAMS 0x0C -#define IEEE80211_HT_CAP_MCS_TX_UEQM 0x10 -/* 802.11n HT IE masks */ -#define IEEE80211_HT_IE_CHA_SEC_OFFSET 0x03 -#define IEEE80211_HT_IE_CHA_SEC_NONE 0x00 -#define IEEE80211_HT_IE_CHA_SEC_ABOVE 0x01 -#define IEEE80211_HT_IE_CHA_SEC_BELOW 0x03 -#define IEEE80211_HT_IE_CHA_WIDTH 0x04 -#define IEEE80211_HT_IE_HT_PROTECTION 0x0003 -#define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 -#define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 +/* for ht_param */ +#define IEEE80211_HT_PARAM_CHA_SEC_OFFSET 0x03 +#define IEEE80211_HT_PARAM_CHA_SEC_NONE 0x00 +#define IEEE80211_HT_PARAM_CHA_SEC_ABOVE 0x01 +#define IEEE80211_HT_PARAM_CHA_SEC_BELOW 0x03 +#define IEEE80211_HT_PARAM_CHAN_WIDTH_ANY 0x04 +#define IEEE80211_HT_PARAM_RIFS_MODE 0x08 +#define IEEE80211_HT_PARAM_SPSMP_SUPPORT 0x10 +#define IEEE80211_HT_PARAM_SERV_INTERVAL_GRAN 0xE0 + +/* for operation_mode */ +#define IEEE80211_HT_OP_MODE_PROTECTION 0x0003 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONE 0 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER 1 +#define IEEE80211_HT_OP_MODE_PROTECTION_20MHZ 2 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED 3 +#define IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT 0x0004 +#define IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT 0x0010 + +/* for stbc_param */ +#define IEEE80211_HT_STBC_PARAM_DUAL_BEACON 0x0040 +#define IEEE80211_HT_STBC_PARAM_DUAL_CTS_PROT 0x0080 +#define IEEE80211_HT_STBC_PARAM_STBC_BEACON 0x0100 +#define IEEE80211_HT_STBC_PARAM_LSIG_TXOP_FULLPROT 0x0200 +#define IEEE80211_HT_STBC_PARAM_PCO_ACTIVE 0x0400 +#define IEEE80211_HT_STBC_PARAM_PCO_PHASE 0x0800 + /* block-ack parameters */ #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 @@ -949,7 +1006,7 @@ enum ieee80211_eid { WLAN_EID_EXT_SUPP_RATES = 50, /* 802.11n */ WLAN_EID_HT_CAPABILITY = 45, - WLAN_EID_HT_EXTRA_INFO = 61, + WLAN_EID_HT_INFORMATION = 61, /* 802.11i */ WLAN_EID_RSN = 48, WLAN_EID_WPA = 221, -- cgit v1.2.3 From d51626df5747efaa8d2c00678f64cb503845effe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:20:13 +0200 Subject: nl80211: export HT capabilities This exports the local HT capabilities in nl80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 9bad65400fba..41720d47d618 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -452,17 +452,29 @@ enum nl80211_mpath_info { * an array of nested frequency attributes * @NL80211_BAND_ATTR_RATES: supported bitrates in this band, * an array of nested bitrate attributes + * @NL80211_BAND_ATTR_HT_MCS_SET: 16-byte attribute containing the MCS set as + * defined in 802.11n + * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE + * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n + * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n */ enum nl80211_band_attr { __NL80211_BAND_ATTR_INVALID, NL80211_BAND_ATTR_FREQS, NL80211_BAND_ATTR_RATES, + NL80211_BAND_ATTR_HT_MCS_SET, + NL80211_BAND_ATTR_HT_CAPA, + NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + /* keep last */ __NL80211_BAND_ATTR_AFTER_LAST, NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 }; +#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA + /** * enum nl80211_frequency_attr - frequency attributes * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz -- cgit v1.2.3 From 93da9cc17c5ae8a751886fd4732db89ad5e9bdb9 Mon Sep 17 00:00:00 2001 From: "colin@cozybit.com" Date: Tue, 21 Oct 2008 12:03:48 -0700 Subject: Add nl80211 commands to get and set o11s mesh networking parameters The two new commands are NL80211_CMD_GET_MESH_PARAMS and NL80211_CMD_SET_MESH_PARAMS. There is a new attribute enum, NL80211_ATTR_MESH_PARAMS, which enumerates the various mesh configuration parameters. Moved struct mesh_config from mac80211/ieee80211_i.h to net/cfg80211.h. nl80211_get_mesh_params and nl80211_set_mesh_params unpack the netlink messages and ask the driver to get or set the configuration. This is done via two new function stubs, get_mesh_params and set_mesh_params, in struct cfg80211_ops. Signed-off-by: Colin McCabe Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 41720d47d618..e4cc7869b22f 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -106,6 +106,12 @@ * to the the specified ISO/IEC 3166-1 alpha2 country code. The core will * store this as a valid request and then query userspace for it. * + * @NL80211_CMD_GET_MESH_PARAMS: Get mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * + * @NL80211_CMD_SET_MESH_PARAMS: Set mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -148,6 +154,9 @@ enum nl80211_commands { NL80211_CMD_SET_REG, NL80211_CMD_REQ_SET_REG, + NL80211_CMD_GET_MESH_PARAMS, + NL80211_CMD_SET_MESH_PARAMS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -296,6 +305,8 @@ enum nl80211_attrs { NL80211_ATTR_REG_ALPHA2, NL80211_ATTR_REG_RULES, + NL80211_ATTR_MESH_PARAMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -606,4 +617,79 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 }; +/** + * enum nl80211_meshconf_params - mesh configuration parameters + * + * Mesh configuration parameters + * + * @__NL80211_MESHCONF_INVALID: internal use + * + * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in + * millisecond units, used by the Peer Link Open message + * + * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the inital confirm timeout, in + * millisecond units, used by the peer link management to close a peer link + * + * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in + * millisecond units + * + * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed + * on this mesh interface + * + * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link + * open retries that can be sent to establish a new peer link instance in a + * mesh + * + * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh + * point. + * + * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically + * open peer links when we detect compatible mesh peers. + * + * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames + * containing a PREQ that an MP can send to a particular destination (path + * target) + * + * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths + * (in milliseconds) + * + * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait + * until giving up on a path discovery (in milliseconds) + * + * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh + * points receiving a PREQ shall consider the forwarding information from the + * root to be valid. (TU = time unit) + * + * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in + * TUs) during which an MP can send only one action frame containing a PREQ + * reference element + * + * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs) + * that it takes for an HWMP information element to propagate across the mesh + * + * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * + * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use + */ +enum nl80211_meshconf_params { + __NL80211_MESHCONF_INVALID, + NL80211_MESHCONF_RETRY_TIMEOUT, + NL80211_MESHCONF_CONFIRM_TIMEOUT, + NL80211_MESHCONF_HOLDING_TIMEOUT, + NL80211_MESHCONF_MAX_PEER_LINKS, + NL80211_MESHCONF_MAX_RETRIES, + NL80211_MESHCONF_TTL, + NL80211_MESHCONF_AUTO_OPEN_PLINKS, + NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + NL80211_MESHCONF_PATH_REFRESH_TIME, + NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + + /* keep last */ + __NL80211_MESHCONF_ATTR_AFTER_LAST, + NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 9387b7caf3049168fc97a8a9111af8fe2143af18 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 30 Sep 2008 20:59:05 -0400 Subject: wireless: use individual buffers for printing ssid values Also change escape_ssid to print_ssid to match print_mac semantics. Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 64a4abce6d91..b0726e2079b5 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -12,8 +12,8 @@ * published by the Free Software Foundation. */ -#ifndef IEEE80211_H -#define IEEE80211_H +#ifndef LINUX_IEEE80211_H +#define LINUX_IEEE80211_H #include #include @@ -1114,4 +1114,4 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) return hdr->addr1; } -#endif /* IEEE80211_H */ +#endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 72118015271e6d3852cb9f647efe0987d131adaa Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 30 Sep 2008 21:43:03 -0400 Subject: wireless: avoid some net/ieee80211.h vs. linux/ieee80211.h conflicts There is quite a lot of overlap in definitions between these headers... Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b0726e2079b5..aad99195a4cc 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -826,8 +826,7 @@ struct ieee80211_ht_info { /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 -#define WLAN_AUTH_FAST_BSS_TRANSITION 2 -#define WLAN_AUTH_LEAP 128 +#define WLAN_AUTH_LEAP 2 #define WLAN_AUTH_CHALLENGE_LEN 128 -- cgit v1.2.3 From 8b30b1fe368ab03049435884c11c5c50e4c4ef0b Mon Sep 17 00:00:00 2001 From: Sujith Date: Fri, 24 Oct 2008 09:55:27 +0530 Subject: mac80211: Re-enable aggregation Wireless HW without any dedicated queues for aggregation do not need the ampdu_queues mechanism present right now in mac80211. Since mac80211 is still incomplete wrt TX MQ changes, do not allow aggregation sessions for drivers that set ampdu_queues. This is only an interim hack until Intel fixes the requeue issue. Signed-off-by: Sujith Signed-off-by: Luis Rodriguez Signed-off-by: John W. Linville --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 487e34507b41..a01b6f84e3bc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -250,6 +250,9 @@ typedef unsigned char *sk_buff_data_t; * @tc_verd: traffic control verdict * @ndisc_nodetype: router type (from link layer) * @do_not_encrypt: set to prevent encryption of this frame + * @requeue: set to indicate that the wireless core should attempt + * a software retry on this frame if we failed to + * receive an ACK for it * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -326,6 +329,7 @@ struct sk_buff { #endif #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) __u8 do_not_encrypt:1; + __u8 requeue:1; #endif /* 0/13/14 bit hole */ -- cgit v1.2.3 From 127cafbb276266b1b8da967bfe25a062ab1d42ab Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 28 Oct 2008 10:51:53 +0800 Subject: tracepoint: introduce *_noupdate APIs. Impact: add new tracepoint APIs to allow the batched registration of probes new APIs separate tracepoint_probe_register(), tracepoint_probe_unregister() into 2 steps. The first step of them is just update tracepoint_entry, not connect or disconnect. this patch introduces tracepoint_probe_update_all() for update all. these APIs are very useful for registering lots of probes but just updating once. Another very important thing is that *_noupdate APIs do not require module_mutex. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c5bb39c7a770..63064e9403f2 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe); */ extern int tracepoint_probe_unregister(const char *name, void *probe); +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + struct tracepoint_iter { struct module *module; struct tracepoint *tracepoint; -- cgit v1.2.3 From 7e5e26a3d8ac4bcadb380073dc9604c07a9a6198 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 09:36:38 -0400 Subject: ftrace: fix hardirq header for non ftrace archs Impact: build fix for non-ftrace architectures Not all archs implement ftrace, and therefore do not have an asm/ftrace.h. This patch corrects the problem. The ftrace_nmi_enter/exit now must be defined for all archs that implement dynamic ftrace. Currently, only x86 does. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ++++- include/linux/hardirq.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e46a7b34037c..0ad1b48aea69 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -44,7 +44,6 @@ static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE - enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -105,6 +104,8 @@ extern void ftrace_release(void *start, unsigned long size); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); #else # define skip_trace(ip) ({ 0; }) @@ -113,6 +114,8 @@ extern void ftrace_enable_daemon(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 0087cb43becf..ffc16ab5a878 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,8 +4,8 @@ #include #include #include +#include #include -#include #include /* -- cgit v1.2.3 From 29cbda77a67cf263d636feea65d3bbc9c7de2e24 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Nov 2008 09:16:39 -0800 Subject: rcu: increase RCU stall-check timeouts Impact: increase timeout of debug check feature Increase RCU stall period timeouts to reduce the likelyhood of false positives. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 5f89b62e6983..301dda829e37 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -41,7 +41,7 @@ #include #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */ #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -- cgit v1.2.3 From 6cf3f41e6c08bca6641a695449791c38a25f35ff Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Mon, 3 Nov 2008 18:16:50 -0800 Subject: bonding, net: Move last_rx update into bonding recv logic The only user of the net_device->last_rx field is bonding. This patch adds a conditional update of last_rx to the bonding special logic in skb_bond_should_drop, causing last_rx to only be updated when the ARP monitor is running. This frees network device drivers from the necessity of updating last_rx, which can have cache line thrash issues. Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- include/linux/if.h | 1 + include/linux/netdevice.h | 32 ++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if.h b/include/linux/if.h index 65246846c844..2a6e29620a96 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -65,6 +65,7 @@ #define IFF_BONDING 0x20 /* bonding master or slave */ #define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ #define IFF_ISATAP 0x80 /* ISATAP interface (RFC4214) */ +#define IFF_MASTER_ARPMON 0x100 /* bonding master, ARP mon in use */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d77b1d7dca8..f1b0dbe58464 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1742,22 +1742,26 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) struct net_device *dev = skb->dev; struct net_device *master = dev->master; - if (master && - (dev->priv_flags & IFF_SLAVE_INACTIVE)) { - if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && - skb->protocol == __constant_htons(ETH_P_ARP)) - return 0; - - if (master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) + if (master) { + if (master->priv_flags & IFF_MASTER_ARPMON) + dev->last_rx = jiffies; + + if (dev->priv_flags & IFF_SLAVE_INACTIVE) { + if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && + skb->protocol == __constant_htons(ETH_P_ARP)) return 0; - } - if (master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __constant_htons(ETH_P_SLOW)) - return 0; - return 1; + if (master->priv_flags & IFF_MASTER_ALB) { + if (skb->pkt_type != PACKET_BROADCAST && + skb->pkt_type != PACKET_MULTICAST) + return 0; + } + if (master->priv_flags & IFF_MASTER_8023AD && + skb->protocol == __constant_htons(ETH_P_SLOW)) + return 0; + + return 1; + } } return 0; } -- cgit v1.2.3 From 511061e2dd1b84bb21bb97c9216a19606c29ac02 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:22:55 +0100 Subject: netfilter: netns ebtables: part 1 * propagate netns from userspace, register table in passed netns * remporarily register every ebt_table in init_net P. S.: one needs to add ".netns_ok = 1" to igmp_protocol to test with ebtables(8) in netns. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index d45e29cd1cfb..624e7883068c 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -300,7 +300,7 @@ struct ebt_table #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ ~(__alignof__(struct ebt_replace)-1)) -extern int ebt_register_table(struct ebt_table *table); +extern int ebt_register_table(struct net *net, struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, -- cgit v1.2.3 From 6beceee5aa2cb94c4ae9f0784c7d3135d343f5b5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:27:15 +0100 Subject: netfilter: netns ebtables: part 2 * return ebt_table from ebt_register_table(), module code will save it into per-netns data for unregistration * duplicate ebt_table at the very beginning of registration -- it's added into list, so one ebt_table wouldn't end up in many lists (and each netns has different one) * introduce underscored tables in individial modules, this is temporary to not break bisection. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 624e7883068c..e40ddb94b1af 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -300,7 +300,8 @@ struct ebt_table #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ ~(__alignof__(struct ebt_replace)-1)) -extern int ebt_register_table(struct net *net, struct ebt_table *table); +extern struct ebt_table *ebt_register_table(struct net *net, + struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, -- cgit v1.2.3 From c5995bd2819dc577d0b32b26be0836d16c977e24 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Tue, 4 Nov 2008 14:06:31 +0000 Subject: uwb: infrastructure for handling Relinquish Request IEs The structures and event handler needed to handle Relinish Request IEs received from neighbors. Nothing is done with these IEs yet. Signed-off-by: Stefano Panella Signed-off-by: David Vrabel --- include/linux/uwb/spec.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h index 198c15f8e251..a30436ea53aa 100644 --- a/include/linux/uwb/spec.h +++ b/include/linux/uwb/spec.h @@ -200,6 +200,12 @@ enum uwb_drp_reason { UWB_DRP_REASON_MODIFIED, }; +/** Relinquish Request Reason Codes ([ECMA-368] table 113) */ +enum uwb_relinquish_req_reason { + UWB_RELINQUISH_REQ_REASON_NON_SPECIFIC = 0, + UWB_RELINQUISH_REQ_REASON_OVER_ALLOCATION, +}; + /** * DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9]) */ @@ -252,6 +258,7 @@ enum uwb_ie { UWB_APP_SPEC_PROBE_IE = 15, UWB_IDENTIFICATION_IE = 19, UWB_MASTER_KEY_ID_IE = 20, + UWB_RELINQUISH_REQUEST_IE = 21, UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */ UWB_APP_SPEC_IE = 255, }; @@ -365,6 +372,27 @@ struct uwb_ie_drp_avail { DECLARE_BITMAP(bmp, UWB_NUM_MAS); } __attribute__((packed)); +/* Relinqish Request IE ([ECMA-368] section 16.8.19). */ +struct uwb_relinquish_request_ie { + struct uwb_ie_hdr hdr; + __le16 relinquish_req_control; + struct uwb_dev_addr dev_addr; + struct uwb_drp_alloc allocs[]; +} __attribute__((packed)); + +static inline int uwb_ie_relinquish_req_reason_code(struct uwb_relinquish_request_ie *ie) +{ + return (le16_to_cpu(ie->relinquish_req_control) >> 0) & 0xf; +} + +static inline void uwb_ie_relinquish_req_set_reason_code(struct uwb_relinquish_request_ie *ie, + int reason_code) +{ + u16 ctrl = le16_to_cpu(ie->relinquish_req_control); + ctrl = (ctrl & ~(0xf << 0)) | (reason_code << 0); + ie->relinquish_req_control = cpu_to_le16(ctrl); +} + /** * The Vendor ID is set to an OUI that indicates the vendor of the device. * ECMA-368 [16.8.10] -- cgit v1.2.3 From 6d5a681dfb583b2f1eefe7cd5505419ca2d4d6c8 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Tue, 4 Nov 2008 14:24:57 +0000 Subject: uwb: add commands to add/remove IEs to the debug interface Add the commands UWB_DBG_CMD_IE_ADD and UWB_DBG_CMD_IE_RM to the debug interface and make them call uwb_rc_ie_add() and uwb_rc_ie_rm(). Signed-off-by: Stefano Panella Signed-off-by: David Vrabel --- include/linux/uwb/debug-cmd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h index 1141f41bab5c..6a16566f0221 100644 --- a/include/linux/uwb/debug-cmd.h +++ b/include/linux/uwb/debug-cmd.h @@ -32,6 +32,8 @@ enum uwb_dbg_cmd_type { UWB_DBG_CMD_RSV_ESTABLISH = 1, UWB_DBG_CMD_RSV_TERMINATE = 2, + UWB_DBG_CMD_IE_ADD = 3, + UWB_DBG_CMD_IE_RM = 4, }; struct uwb_dbg_cmd_rsv_establish { @@ -46,11 +48,18 @@ struct uwb_dbg_cmd_rsv_terminate { int index; }; +struct uwb_dbg_cmd_ie { + __u8 data[128]; + int len; +}; + struct uwb_dbg_cmd { __u32 type; union { struct uwb_dbg_cmd_rsv_establish rsv_establish; struct uwb_dbg_cmd_rsv_terminate rsv_terminate; + struct uwb_dbg_cmd_ie ie_add; + struct uwb_dbg_cmd_ie ie_rm; }; }; -- cgit v1.2.3 From fec1a5932f16c0eb1b3f5ca2e18d81d860924088 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Tue, 4 Nov 2008 15:39:08 +0000 Subject: uwb: per-radio controller event thread and beacon cache Use an event thread per-radio controller so processing events from one radio controller doesn't delay another. A radio controller shouldn't have information on devices seen by a different radio controller (they may be on different channels) so make the beacon cache per-radio controller. Signed-off-by: Stefano Panella Signed-off-by: David Vrabel --- include/linux/uwb.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index 6d93f54b8879..881f0c5b6d28 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,22 @@ struct uwb_notifs_chain { struct mutex mutex; }; +/* Beacon cache list */ +struct uwb_beca { + struct list_head list; + size_t entries; + struct mutex mutex; +}; + +/* Event handling thread. */ +struct uwbd { + int pid; + struct task_struct *task; + wait_queue_head_t wq; + struct list_head event_list; + spinlock_t event_list_lock; +}; + /** * struct uwb_mas_bm - a bitmap of all MAS in a superframe * @bm: a bitmap of length #UWB_NUM_MAS @@ -342,6 +359,9 @@ struct uwb_rc { enum uwb_scan_type scan_type:3; unsigned ready:1; struct uwb_notifs_chain notifs_chain; + struct uwb_beca uwb_beca; + + struct uwbd uwbd; struct uwb_drp_avail drp_avail; struct list_head reservations; -- cgit v1.2.3 From 71566a0d161edec70361b7f90f6e54af6a6d5d05 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 12:57:20 +0100 Subject: tracing/fastboot: Enable boot tracing only during initcalls Impact: modify boot tracer We used to disable the initcall tracing at a specified time (IE: end of builtin initcalls). But we don't need it anymore. It will be stopped when initcalls are finished. However we want two things: _Start this tracing only after pre-smp initcalls are finished. _Since we are planning to trace sched_switches at the same time, we want to enable them only during the initcall execution. For this purpose, this patch introduce two functions to enable/disable the sched_switch tracing during boot. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e46a7b34037c..4642959e5bda 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -234,6 +234,11 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +/* + * Structure which defines the trace of an initcall. + * You don't have to fill the func field since it is + * only used internally by the tracer. + */ struct boot_trace { pid_t caller; char func[KSYM_NAME_LEN]; @@ -244,13 +249,28 @@ struct boot_trace { }; #ifdef CONFIG_BOOT_TRACER +/* Append the trace on the ring-buffer */ extern void trace_boot(struct boot_trace *it, initcall_t fn); + +/* Tells the tracer that smp_pre_initcall is finished. + * So we can start the tracing + */ extern void start_boot_trace(void); -extern void stop_boot_trace(void); + +/* Resume the tracing of other necessary events + * such as sched switches + */ +extern void enable_boot_trace(void); + +/* Suspend this tracing. Actually, only sched_switches tracing have + * to be suspended. Initcalls doesn't need it.) + */ +extern void disable_boot_trace(void); #else static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } static inline void start_boot_trace(void) { } -static inline void stop_boot_trace(void) { } +static inline void enable_boot_trace(void) { } +static inline void disable_boot_trace(void) { } #endif -- cgit v1.2.3 From fd8cd7e1919fc1c27fe2fdccd2a1cd32f791ef0f Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 3 Nov 2008 15:50:38 -0800 Subject: x86: vmware: look for DMI string in the product serial key Impact: Should permit VMware detection on older platforms where the vendor is changed. Could theoretically cause a regression if some weird serial number scheme contains the string "VMware" by pure chance. Seems unlikely, especially with the mixed case. In some user configured cases, VMware may choose not to put a VMware specific DMI string, but the product serial key is always there and is VMware specific. Add a interface to check the serial key, when checking for VMware in the DMI information. Signed-off-by: Alok N Kataria Signed-off-by: H. Peter Anvin --- include/linux/dmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index e5084eb5943a..2bfda178f274 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -44,6 +44,7 @@ extern const struct dmi_device * dmi_find_device(int type, const char *name, extern void dmi_scan_machine(void); extern int dmi_get_year(int field); extern int dmi_name_in_vendors(const char *str); +extern int dmi_name_in_serial(const char *str); extern int dmi_available; extern int dmi_walk(void (*decode)(const struct dmi_header *)); @@ -56,6 +57,7 @@ static inline const struct dmi_device * dmi_find_device(int type, const char *na static inline void dmi_scan_machine(void) { return; } static inline int dmi_get_year(int year) { return 0; } static inline int dmi_name_in_vendors(const char *s) { return 0; } +static inline int dmi_name_in_serial(const char *s) { return 0; } #define dmi_available 0 static inline int dmi_walk(void (*decode)(const struct dmi_header *)) { return -1; } -- cgit v1.2.3 From 171bbfbeab7730031eec8025341401fabe540bd5 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 25 Nov 2008 17:42:31 -0500 Subject: jbd2: Add BH_JBDPrivateStart Add this so that file systems using JBD2 can safely allocate unused b_state bits. In this case, we add it so that Ocfs2 can define a single bit for tracking the validation state of a buffer. Signed-off-by: Mark Fasheh Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index c7d106ef22e2..f36645745489 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -329,6 +329,7 @@ enum jbd_state_bits { BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ + BH_JBDPrivateStart, /* First bit available for private use by FS */ }; BUFFER_FNS(JBD, jbd) -- cgit v1.2.3 From e07f7183a486cf9783d1f8c9d2997b5b39eeb2d4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 26 Nov 2008 01:14:26 -0500 Subject: jbd2: improve jbd2 fsync batching This patch removes the static sleep time in favor of a more self optimizing approach where we measure the average amount of time it takes to commit a transaction to disk and the ammount of time a transaction has been running. If somebody does a sync write or an fsync() traditionally we would sleep for 1 jiffies, which depending on the value of HZ could be a significant amount of time compared to how long it takes to commit a transaction to the underlying storage. With this patch instead of sleeping for a jiffie, we check to see if the amount of time this transaction has been running is less than the average commit time, and if it is we sleep for the delta using schedule_hrtimeout to give us a higher precision sleep time. This greatly benefits high end storage where you could end up sleeping for longer than it takes to commit the transaction and therefore sitting idle instead of allowing the transaction to be committed by keeping the sleep time to a minimum so you are sure to always be doing something. Signed-off-by: Josef Bacik Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f36645745489..ab8cef130c28 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -637,6 +637,11 @@ struct transaction_s */ unsigned long t_expires; + /* + * When this transaction started, in nanoseconds [no locking] + */ + ktime_t t_start_time; + /* * How many handles used this transaction? [t_handle_lock] */ @@ -939,8 +944,18 @@ struct journal_s struct buffer_head **j_wbuf; int j_wbufsize; + /* + * this is the pid of hte last person to run a synchronous operation + * through the journal + */ pid_t j_last_sync_writer; + /* + * the average amount of time in nanoseconds it takes to commit a + * transaction to disk. [j_state_lock] + */ + u64 j_average_commit_time; + /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- cgit v1.2.3 From 30773840c19cea60dcef39545960d541b1ac1cf8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 20:27:38 -0500 Subject: ext4: add fsync batch tuning knobs Add new mount options, min_batch_time and max_batch_time, which controls how long the jbd2 layer should wait for additional filesystem operations to get batched with a synchronous write transaction. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ab8cef130c28..a3cd647ea1bc 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -956,6 +956,14 @@ struct journal_s */ u64 j_average_commit_time; + /* + * minimum and maximum times that we should wait for + * additional filesystem operations to get batched into a + * synchronous handle in microseconds + */ + u32 j_min_batch_time; + u32 j_max_batch_time; + /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- cgit v1.2.3 From 1a0d3786dd57dbd74f340322054c3d618b999dcf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Nov 2008 00:09:22 -0500 Subject: jbd2: Remove a large array of bh's from the stack of the checkpoint routine jbd2_log_do_checkpoint()n is one of the kernel's largest stack users. Move the array of buffer head's from the stack of jbd2_log_do_checkpoint() to the in-core journal structure. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a3cd647ea1bc..004c9a8d63ed 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -687,6 +687,8 @@ jbd2_time_diff(unsigned long start, unsigned long end) return end + (MAX_JIFFY_OFFSET - start); } +#define JBD2_NR_BATCH 64 + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -830,6 +832,14 @@ struct journal_s /* Semaphore for locking against concurrent checkpoints */ struct mutex j_checkpoint_mutex; + /* + * List of buffer heads used by the checkpoint routine. This + * was moved from jbd2_log_do_checkpoint() to reduce stack + * usage. Access to this array is controlled by the + * j_checkpoint_mutex. [j_checkpoint_mutex] + */ + struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; + /* * Journal head: identifies the first unused block in the journal. * [j_state_lock] -- cgit v1.2.3 From 7d43d1a0f2cf535167ec7247f110a1f85cecac43 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 4 Nov 2008 23:43:47 -0800 Subject: dccp: Implement lookup table for feature-negotiation information A lookup table for feature-negotiation information, extracted from RFC 4340/42, is provided by this patch. All currently known features can be found in this table, along with their feature location, their default value, and type. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6080449fbec9..3978aff197d9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -176,19 +176,20 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum { +enum dccp_feature_numbers { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ + DCCPF_SHORT_SEQNOS = 2, DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ + DCCPF_ECN_INCAPABLE = 4, DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ + DCCPF_DATA_CHECKSUM = 9, /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -- cgit v1.2.3 From ac75773c2742d82cbcb078708df406e9017224b7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 4 Nov 2008 23:55:49 -0800 Subject: dccp: Per-socket initialisation of feature negotiation This provides feature-negotiation initialisation for both DCCP sockets and DCCP request_sockets, to support feature negotiation during connection setup. It also resolves a FIXME regarding the congestion control initialisation. Thanks to Wei Yongjun for help with the IPv6 side of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3978aff197d9..484b8a1fb023 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -412,6 +412,7 @@ extern void dccp_minisock_init(struct dccp_minisock *dmsk); * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) + * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -421,6 +422,7 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; + struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -498,6 +500,7 @@ struct dccp_ackvec; * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) * @dccps_minisock - associated minisock (accessed via dccp_msk) + * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) @@ -535,6 +538,7 @@ struct dccp_sock { __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; + struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; -- cgit v1.2.3 From 1f29fae29709b4668979e244c09b2fa78ff1ad59 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 5 Nov 2008 16:08:52 -0600 Subject: file capabilities: add no_file_caps switch (v4) Add a no_file_caps boot option when file capabilities are compiled into the kernel (CONFIG_SECURITY_FILE_CAPABILITIES=y). This allows distributions to ship a kernel with file capabilities compiled in, without forcing users to use (and understand and trust) them. When no_file_caps is specified at boot, then when a process executes a file, any file capabilities stored with that file will not be used in the calculation of the process' new capability sets. This means that booting with the no_file_caps boot option will not be the same as booting a kernel with file capabilities compiled out - in particular a task with CAP_SETPCAP will not have any chance of passing capabilities to another task (which isn't "really" possible anyway, and which may soon by killed altogether by David Howells in any case), and it will instead be able to put new capabilities in its pI. However since fI will always be empty and pI is masked with fI, it gains the task nothing. We also support the extra prctl options, setting securebits and dropping capabilities from the per-process bounding set. The other remaining difference is that killpriv, task_setscheduler, setioprio, and setnice will continue to be hooked. That will be noticable in the case where a root task changed its uid while keeping some caps, and another task owned by the new uid tries to change settings for the more privileged task. Changelog: Nov 05 2008: (v4) trivial port on top of always-start-\ with-clear-caps patch Sep 23 2008: nixed file_caps_enabled when file caps are not compiled in as it isn't used. Document no_file_caps in kernel-parameters.txt. Signed-off-by: Serge Hallyn Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/capability.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 9d1fe30b6f6c..5bc145bd759a 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -68,6 +68,9 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32 VFS_CAP_U32_2 #define VFS_CAP_REVISION VFS_CAP_REVISION_2 +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +extern int file_caps_enabled; +#endif struct vfs_cap_data { __le32 magic_etc; /* Little endian */ -- cgit v1.2.3 From ae33bc40c0d96d02f51a996482ea7e41c5152695 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 16:00:02 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe58464..12d7f4469dc9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ -- cgit v1.2.3 From fd9abb3d97c2ab883e4732ec1214fe64190236e7 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Wed, 5 Nov 2008 00:35:37 +0000 Subject: SMSC LAN911x and LAN921x vendor driver Attached is a driver for SMSC's LAN911x and LAN921x families of embedded ethernet controllers. There is an existing smc911x driver in the tree; this is intended to replace it. Dustin McIntire (the author of the smc911x driver) has expressed his support for switching to this driver. This driver contains workarounds for all known hardware issues, and has been tested on all flavours of the chip on multiple architectures. This driver now uses phylib, so this patch also adds support for the device's internal phy Signed-off-by: Steve Glendinning Signed-off-by: Bahadir Balban Signed-off-by: Dustin Mcintire Signed-off-by: Bill Gatliff Signed-off-by: Jeff Garzik --- include/linux/smsc911x.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/smsc911x.h (limited to 'include/linux') diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h new file mode 100644 index 000000000000..47c4ffd10dbb --- /dev/null +++ b/include/linux/smsc911x.h @@ -0,0 +1,42 @@ +/*************************************************************************** + * + * Copyright (C) 2004-2008 SMSC + * Copyright (C) 2005-2008 ARM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + ***************************************************************************/ +#ifndef __LINUX_SMSC911X_H__ +#define __LINUX_SMSC911X_H__ + +#include + +/* platform_device configuration data, should be assigned to + * the platform_device's dev.platform_data */ +struct smsc911x_platform_config { + unsigned int irq_polarity; + unsigned int irq_type; + phy_interface_t phy_interface; +}; + +/* Constants for platform_device irq polarity configuration */ +#define SMSC911X_IRQ_POLARITY_ACTIVE_LOW 0 +#define SMSC911X_IRQ_POLARITY_ACTIVE_HIGH 1 + +/* Constants for platform_device irq type configuration */ +#define SMSC911X_IRQ_TYPE_OPEN_DRAIN 0 +#define SMSC911X_IRQ_TYPE_PUSH_PULL 1 + +#endif /* __LINUX_SMSC911X_H__ */ -- cgit v1.2.3 From 60a7ecf42661f2b22168751298592da6ee210c9e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: add quick function trace stop Impact: quick start and stop of function tracer This patch adds a way to disable the function tracer quickly without the need to run kstop_machine. It adds a new variable called function_trace_stop which will stop the calls to functions from mcount when set. This is just an on/off switch and does not handle recursion like preempt_disable(). It's main purpose is to help other tracers/debuggers start and stop tracing fuctions without the need to call kstop_machine. The config option HAVE_FUNCTION_TRACE_MCOUNT_TEST is added for archs that implement the testing of the function_trace_stop in the mcount arch dependent code. Otherwise, the test is done in the C code. x86 is the only arch at the moment that supports this. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4642959e5bda..794ab907dbfe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -23,6 +23,34 @@ struct ftrace_ops { struct ftrace_ops *next; }; +extern int function_trace_stop; + +/** + * ftrace_stop - stop function tracer. + * + * A quick way to stop the function tracer. Note this an on off switch, + * it is not something that is recursive like preempt_disable. + * This does not disable the calling of mcount, it only stops the + * calling of functions from mcount. + */ +static inline void ftrace_stop(void) +{ + function_trace_stop = 1; +} + +/** + * ftrace_start - start the function tracer. + * + * This function is the inverse of ftrace_stop. This does not enable + * the function tracing if the function tracer is disabled. This only + * sets the function tracer flag to continue calling the functions + * from mcount. + */ +static inline void ftrace_start(void) +{ + function_trace_stop = 0; +} + /* * The ftrace_ops must be a static and should also * be read_mostly. These functions do modify read_mostly variables @@ -41,6 +69,8 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) static inline void ftrace_kill(void) { } +static inline void ftrace_stop(void) { } +static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3 From 0f04870148ecb825133bc2733f473b1c5773ac0b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: soft tracing stop and start Impact: add way to quickly start stop tracing from the kernel This patch adds a soft stop and start to the trace. This simply disables function tracing via the ftrace_disabled flag, and disables the trace buffers to prevent recording. The tracing code may still be executed, but the trace will not be recorded. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 794ab907dbfe..7a75fc6d41f4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -216,6 +216,9 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_TRACING extern int ftrace_dump_on_oops; +extern void tracing_start(void); +extern void tracing_stop(void); + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -246,6 +249,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } static inline int ftrace_printk(const char *fmt, ...) { -- cgit v1.2.3 From 6a60dd121c5b6c2d827e99b38c1326f2600c3891 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Nov 2008 15:55:21 -0500 Subject: ftrace: split out hardirq ftrace code into own header Impact: moving of function prototypes into own header file ftrace.h is too big of a file for hardirq.h, and some archs will fail to build because of the include dependencies not being met. This patch pulls out the required prototypes for hardirq.h into a smaller and safer ftrace_irq.h file. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ----- include/linux/ftrace_irq.h | 13 +++++++++++++ include/linux/hardirq.h | 2 +- 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 include/linux/ftrace_irq.h (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0ad1b48aea69..1b340e3fa249 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -104,9 +104,6 @@ extern void ftrace_release(void *start, unsigned long size); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); -extern void ftrace_nmi_enter(void); -extern void ftrace_nmi_exit(void); - #else # define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) @@ -114,8 +111,6 @@ extern void ftrace_nmi_exit(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h new file mode 100644 index 000000000000..b1299d6729f2 --- /dev/null +++ b/include/linux/ftrace_irq.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_FTRACE_IRQ_H +#define _LINUX_FTRACE_IRQ_H + + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); +#else +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif + +#endif /* _LINUX_FTRACE_IRQ_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index ffc16ab5a878..89a56d79e4c6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From fb68407b0d9efba962c03f55009c797e22f024bc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Nov 2008 17:50:21 -0500 Subject: jbd2: Call journal commit callback without holding j_list_lock Avoid freeing the transaction in __jbd2_journal_drop_transaction() so the journal commit callback can run without holding j_list_lock, to avoid lock contention on this spinlock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 004c9a8d63ed..9d82084a1605 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1179,8 +1179,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); void __jbd2_log_wait_for_space(journal_t *journal); -extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); -extern int jbd2_cleanup_journal_tail(journal_t *); +extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); +extern int jbd2_cleanup_journal_tail(journal_t *); /* Debugging code only: */ -- cgit v1.2.3 From 307ba6dd73254fe7d2ce27db64ffd90e1bb3c6c0 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 7 Nov 2008 17:37:33 +0000 Subject: uwb: don't unbind the radio controller driver when resetting Use pre_reset and post_reset methods to avoid unbinding the radio controller driver after a uwb_rc_reset_all() call. This avoids a deadlock in uwb_rc_rm() when waiting for the uwb event thread to stop. Signed-off-by: David Vrabel --- include/linux/uwb.h | 2 ++ include/linux/uwb/umc.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index 881f0c5b6d28..c4854848999d 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -540,6 +540,8 @@ void uwb_rc_rm(struct uwb_rc *); void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t); void uwb_rc_neh_error(struct uwb_rc *, int); void uwb_rc_reset_all(struct uwb_rc *rc); +void uwb_rc_pre_reset(struct uwb_rc *rc); +void uwb_rc_post_reset(struct uwb_rc *rc); /** * uwb_rsv_is_owner - is the owner of this reservation the RC? diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h index 36a39e34f8d7..4b4fc0f43855 100644 --- a/include/linux/uwb/umc.h +++ b/include/linux/uwb/umc.h @@ -89,6 +89,8 @@ struct umc_driver { void (*remove)(struct umc_dev *); int (*suspend)(struct umc_dev *, pm_message_t state); int (*resume)(struct umc_dev *); + int (*pre_reset)(struct umc_dev *); + int (*post_reset)(struct umc_dev *); struct device_driver driver; }; -- cgit v1.2.3 From 3d8160b1493bcadca74fbb635d79b3928b8999cf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 7 Nov 2008 22:52:14 -0800 Subject: Revert "net: Guaranetee the proper ordering of the loopback device." This reverts commit ae33bc40c0d96d02f51a996482ea7e41c5152695. --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f4469dc9..f1b0dbe58464 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,7 +1766,6 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } -extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ -- cgit v1.2.3 From 505d4f73dda9e20d59da05008f1f5eb432613e71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Nov 2008 22:54:20 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. v2 I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. But do it carefully so we register the loopback device after we clear dev_boot_phase. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe58464..12d7f4469dc9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ -- cgit v1.2.3 From f400923735ecbb67cbe4a3606c9479f694754f51 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 7 Nov 2008 22:56:00 -0800 Subject: pkt_sched: Control group classifier The classifier should cover the most common use case and will work without any special configuration. The principle of the classifier is to directly access the task_struct via get_current(). In order for this to work, classification requests from softirqs must be ignored. This is not a problem because the vast majority of packets in softirq context are not assigned to a task anyway. For this to work, a mechanism is needed to trace softirq context. This repost goes back to the method of relying on the number of nested bh disable calls for the sake of not adding too much complexity and the option to come up with something more reliable if actually needed. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/cgroup_subsys.h | 6 ++++++ include/linux/pkt_cls.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 9c22396e8b50..9c8d31bacf46 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -54,3 +54,9 @@ SUBSYS(freezer) #endif /* */ + +#ifdef CONFIG_NET_CLS_CGROUP +SUBSYS(net_cls) +#endif + +/* */ diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 7cf7824df778..e6aa8482ad7a 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -394,6 +394,20 @@ enum #define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) + +/* Cgroup classifier */ + +enum +{ + TCA_CGROUP_UNSPEC, + TCA_CGROUP_ACT, + TCA_CGROUP_POLICE, + TCA_CGROUP_EMATCHES, + __TCA_CGROUP_MAX, +}; + +#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr -- cgit v1.2.3 From 1239cd58d237fa6ad501acaec8776262a5784ec8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 28 Oct 2008 11:12:57 +0100 Subject: wireless: move mesh config length constant This is a constant from the 802.11 specification. Signed-off-by: Johannes Berg Cc: Javier Cardona Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index aad99195a4cc..9dc288b920c8 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -97,7 +97,10 @@ #define IEEE80211_MAX_FRAME_LEN 2352 #define IEEE80211_MAX_SSID_LEN 32 + #define IEEE80211_MAX_MESH_ID_LEN 32 +#define IEEE80211_MESH_CONFIG_LEN 19 + #define IEEE80211_QOS_CTL_LEN 2 #define IEEE80211_QOS_CTL_TID_MASK 0x000F #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 -- cgit v1.2.3 From 90c97a040d6b08cc4890328aa262fdc37336ab01 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 16:59:22 +0200 Subject: nl80211: Add basic rate configuration for AP mode Add a new attribute, NL80211_ATTR_BSS_BASIC_RATES, that can be used with NL80211_CMD_SET_BSS for userspace (e.g., hostapd) to set which rates are in the basic rate set. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e4cc7869b22f..5009809588c0 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -243,6 +243,9 @@ enum nl80211_commands { * (u8, 0 or 1) * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled * (u8, 0 or 1) + * @NL80211_ATTR_BSS_BASIC_RATES: basic rates, array of basic + * rates in format defined by IEEE 802.11 7.3.2.2 but without the length + * restriction (at most %NL80211_MAX_SUPP_RATES). * * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) @@ -307,6 +310,8 @@ enum nl80211_attrs { NL80211_ATTR_MESH_PARAMS, + NL80211_ATTR_BSS_BASIC_RATES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -318,6 +323,7 @@ enum nl80211_attrs { * here */ #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY +#define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 -- cgit v1.2.3 From 318884875bdddca663ecc373c813cf8e117d9e43 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 16:59:24 +0200 Subject: nl80211: Add TX queue parameter configuration Add a new attribute, NL80211_ATTR_WIPHY_TXQ_PARAMS, that can be used with NL80211_CMD_SET_WIPHY for userspace (e.g., hostapd) to set TX queue parameters (txop, cwmin, cwmax, aifs). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 5009809588c0..79827345351d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -25,8 +25,9 @@ * * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request * to get a list of all present wiphys. - * @NL80211_CMD_SET_WIPHY: set wiphy name, needs %NL80211_ATTR_WIPHY and - * %NL80211_ATTR_WIPHY_NAME. + * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or + * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME + * and/or %NL80211_ATTR_WIPHY_TXQ_PARAMS. * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request * or rename notification. Has attributes %NL80211_ATTR_WIPHY and * %NL80211_ATTR_WIPHY_NAME. @@ -178,6 +179,7 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY: index of wiphy to operate on, cf. * /sys/class/ieee80211//index * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) + * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -312,6 +314,8 @@ enum nl80211_attrs { NL80211_ATTR_BSS_BASIC_RATES, + NL80211_ATTR_WIPHY_TXQ_PARAMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -324,6 +328,7 @@ enum nl80211_attrs { */ #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES +#define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -698,4 +703,38 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_txq_attr - TX queue parameter attributes + * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved + * @NL80211_TXQ_ATTR_QUEUE: TX queue identifier (NL80211_TXQ_Q_*) + * @NL80211_TXQ_ATTR_TXOP: Maximum burst time in units of 32 usecs, 0 meaning + * disabled + * @NL80211_TXQ_ATTR_CWMIN: Minimum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_CWMAX: Maximum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_AIFS: Arbitration interframe space [0..255] + * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal + * @NL80211_TXQ_ATTR_MAX: Maximum TXQ attribute number + */ +enum nl80211_txq_attr { + __NL80211_TXQ_ATTR_INVALID, + NL80211_TXQ_ATTR_QUEUE, + NL80211_TXQ_ATTR_TXOP, + NL80211_TXQ_ATTR_CWMIN, + NL80211_TXQ_ATTR_CWMAX, + NL80211_TXQ_ATTR_AIFS, + + /* keep last */ + __NL80211_TXQ_ATTR_AFTER_LAST, + NL80211_TXQ_ATTR_MAX = __NL80211_TXQ_ATTR_AFTER_LAST - 1 +}; + +enum nl80211_txq_q { + NL80211_TXQ_Q_VO, + NL80211_TXQ_Q_VI, + NL80211_TXQ_Q_BE, + NL80211_TXQ_Q_BK +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From fc6971d491517ba15e800540ff88caa55dc65b01 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 19:59:05 +0200 Subject: mac80211_hwsim: Add support for client PS mode This introduces a debugfs file (ieee80211/phy#/hwsim/ps) that can be used to force a simulated radio into power save mode. Following values can be written into this file to change PS mode: 0 = power save disabled (constantly awake) 1 = power save enabled (drop all frames; do not send PS-Poll) 2 = power save enabled (send PS-Poll frames automatically to receive buffered unicast frames); not yet fully implemented 3 = manual PS-Poll trigger (send a single PS-Poll frame) Two different behavior for power save mode processing can be tested: - move between modes 1 and 0 (i.e., receive all buffered frames at a time) - move to mode 1 and use manual PS-Poll frames (write 3 to the 'ps' debugfs file) to fetch power save buffered frames one at a time Mode 2 (automatic PS-Poll) does not yet parse Beacon frames, but eventually, it should take a look at TIM IE and send PS-Poll if a traffic bit is set for our AID. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 9dc288b920c8..56b0eb25d927 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -669,6 +669,13 @@ struct ieee80211_cts { u8 ra[6]; } __attribute__ ((packed)); +struct ieee80211_pspoll { + __le16 frame_control; + __le16 aid; + u8 bssid[6]; + u8 ta[6]; +} __attribute__ ((packed)); + /** * struct ieee80211_bar - HT Block Ack Request * -- cgit v1.2.3 From caf4b323b02a16c92fba449952ac6515ddc76d7a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 07:03:45 +0100 Subject: tracing, x86: add low level support for ftrace return tracing Impact: add infrastructure for function-return tracing Add low level support for ftrace return tracing. This plug-in stores return addresses on the thread_info structure of the current task. The index of the current return address is initialized when the task is the first one (init) and when a process forks (the child). It is not needed when a task does a sys_execve because after this syscall, it still needs to return on the kernel functions it called. Note that the code of return_to_handler has been suggested by Steven Rostedt as almost all of the ideas of improvements in this V3. For purpose of security, arch/x86/kernel/process_32.c is not traced because __switch_to() changes the current task during its execution. That could cause inconsistency in the stored return address of this function even if I didn't have any crash after testing with tracing on this function enabled. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 20 ++++++++++++++++++++ include/linux/ftrace_irq.h | 2 +- include/linux/sched.h | 11 +++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f5608c11023..dcbbf72a88b1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -267,6 +267,26 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +/* + * Structure that defines a return function trace. + */ +struct ftrace_retfunc { + unsigned long ret; /* Return address */ + unsigned long func; /* Current function */ + unsigned long long calltime; + unsigned long long rettime; +}; + +#ifdef CONFIG_FUNCTION_RET_TRACER +/* Type of a callback handler of tracing return function */ +typedef void (*trace_function_return_t)(struct ftrace_retfunc *); + +extern void register_ftrace_return(trace_function_return_t func); +/* The current handler in use */ +extern trace_function_return_t ftrace_function_return; +extern void unregister_ftrace_return(void); +#endif + /* * Structure which defines the trace of an initcall. * You don't have to fill the func field since it is diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index b1299d6729f2..0b4df55d7a74 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#ifdef CONFIG_DYNAMIC_FTRACE +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/sched.h b/include/linux/sched.h index 295b7c756ca6..df77abe860c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2005,6 +2005,17 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; + +#ifdef CONFIG_FUNCTION_RET_TRACER + /* + * When fork() creates a child process, this function is called. + * But the child task may not inherit the return adresses traced + * by the return function tracer because it will directly execute + * in userspace and will not return to kernel functions its parent + * used. + */ + task_thread_info(p)->curr_ret_stack = -1; +#endif } static inline unsigned long *end_of_stack(struct task_struct *p) -- cgit v1.2.3 From 9d36be76c55ad2c2bb29683b752b0d9ad2e4eeef Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:07 +1100 Subject: Document the order of arguments for cap_issubset. It's not instantly clear which order the argument should be in. So give an example. Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 5bc145bd759a..b5750d0b96e0 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -457,6 +457,13 @@ static inline int cap_isclear(const kernel_cap_t a) return 1; } +/* + * Check if "a" is a subset of "set". + * return 1 if ALL of the capabilities in "a" are also in "set" + * cap_issubset(0101, 1111) will return 1 + * return 0 if ANY of the capabilities in "a" are not in "set" + * cap_issubset(1111, 0101) will return 0 + */ static inline int cap_issubset(const kernel_cap_t a, const kernel_cap_t set) { kernel_cap_t dest; -- cgit v1.2.3 From c0b004413a46a0a5744e6d2b85220fe9d2c33d48 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:10 +1100 Subject: This patch add a generic cpu endian caps structure and externally available functions which retrieve fcaps information from disk. This information is necessary so fcaps information can be collected and recorded by the audit system. Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index b5750d0b96e0..d567af247ed8 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -99,6 +99,13 @@ typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; } kernel_cap_t; +/* exact same as vfs_cap_data but in cpu endian and always filled completely */ +struct cpu_vfs_cap_data { + __u32 magic_etc; + kernel_cap_t permitted; + kernel_cap_t inheritable; +}; + #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) -- cgit v1.2.3 From 851f7ff56d9c21272f289dd85fb3f1b6cf7a6e10 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:14 +1100 Subject: This patch will print cap_permitted and cap_inheritable data in the PATH records of any file that has file capabilities set. Files which do not have fcaps set will not have different PATH records. An example audit record if you run: setcap "cap_net_admin+pie" /bin/bash /bin/bash type=SYSCALL msg=audit(1225741937.363:230): arch=c000003e syscall=59 success=yes exit=0 a0=2119230 a1=210da30 a2=20ee290 a3=8 items=2 ppid=2149 pid=2923 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=3 comm="ping" exe="/bin/ping" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=EXECVE msg=audit(1225741937.363:230): argc=2 a0="ping" a1="www.google.com" type=CWD msg=audit(1225741937.363:230): cwd="/root" type=PATH msg=audit(1225741937.363:230): item=0 name="/bin/ping" inode=49256 dev=fd:00 mode=0104755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ping_exec_t:s0 cap_fp=0000000000002000 cap_fi=0000000000002000 cap_fe=1 cap_fver=2 type=PATH msg=audit(1225741937.363:230): item=1 name=(null) inode=507915 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ld_so_t:s0 Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index d567af247ed8..0f1950181102 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -53,6 +53,7 @@ typedef struct __user_cap_data_struct { #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX #define VFS_CAP_REVISION_MASK 0xFF000000 +#define VFS_CAP_REVISION_SHIFT 24 #define VFS_CAP_FLAGS_MASK ~VFS_CAP_REVISION_MASK #define VFS_CAP_FLAGS_EFFECTIVE 0x000001 @@ -534,6 +535,10 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); extern int capable(int cap); +/* audit system wants to get cap info from files as well */ +struct dentry; +extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); + #endif /* __KERNEL__ */ #endif /* !_LINUX_CAPABILITY_H */ -- cgit v1.2.3 From 3fc689e96c0c90b6fede5946d6c31075e9464f69 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:18 +1100 Subject: Any time fcaps or a setuid app under SECURE_NOROOT is used to result in a non-zero pE we will crate a new audit record which contains the entire set of known information about the executable in question, fP, fI, fE, fversion and includes the process's pE, pI, pP. Before and after the bprm capability are applied. This record type will only be emitted from execve syscalls. an example of making ping use fcaps instead of setuid: setcap "cat_net_raw+pe" /bin/ping type=SYSCALL msg=audit(1225742021.015:236): arch=c000003e syscall=59 success=yes exit=0 a0=1457f30 a1=14606b0 a2=1463940 a3=321b770a70 items=2 ppid=2929 pid=2963 auid=0 uid=500 gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 ses=3 comm="ping" exe="/bin/ping" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1321] msg=audit(1225742021.015:236): fver=2 fp=0000000000002000 fi=0000000000000000 fe=1 old_pp=0000000000000000 old_pi=0000000000000000 old_pe=0000000000000000 new_pp=0000000000002000 new_pi=0000000000000000 new_pe=0000000000002000 type=EXECVE msg=audit(1225742021.015:236): argc=2 a0="ping" a1="127.0.0.1" type=CWD msg=audit(1225742021.015:236): cwd="/home/test" type=PATH msg=audit(1225742021.015:236): item=0 name="/bin/ping" inode=49256 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ping_exec_t:s0 cap_fp=0000000000002000 cap_fe=1 cap_fver=2 type=PATH msg=audit(1225742021.015:236): item=1 name=(null) inode=507915 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ld_so_t:s0 Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/audit.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6272a395d43c..8cfb9feb2a05 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -99,6 +99,7 @@ #define AUDIT_OBJ_PID 1318 /* ptrace target */ #define AUDIT_TTY 1319 /* Input on an administrative TTY */ #define AUDIT_EOE 1320 /* End of multi-record event */ +#define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -452,6 +453,7 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); +extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -501,6 +503,29 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) return __audit_mq_getsetattr(mqdes, mqstat); return 0; } + +/* + * ieieeeeee, an audit function without a return code! + * + * This function might fail! I decided that it didn't matter. We are too late + * to fail the syscall and the information isn't REQUIRED for any purpose. It's + * just nice to have. We should be able to look at past audit logs to figure + * out this process's current cap set along with the fcaps from the PATH record + * and use that to come up with the final set. Yeah, its ugly, but all the info + * is still in the audit log. So I'm not going to bother mentioning we failed + * if we couldn't allocate memory. + * + * If someone changes their mind they could create the aux record earlier and + * then search here and use that earlier allocation. But I don't wanna. + * + * -Eric + */ +static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +{ + if (unlikely(!audit_dummy_context())) + __audit_log_bprm_fcaps(bprm, pP, pE); +} + extern int audit_n_rules; extern int audit_signals; #else @@ -532,6 +557,7 @@ extern int audit_signals; #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) +#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 -- cgit v1.2.3 From e68b75a027bb94066576139ee33676264f867b87 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:22 +1100 Subject: When the capset syscall is used it is not possible for audit to record the actual capbilities being added/removed. This patch adds a new record type which emits the target pid and the eff, inh, and perm cap sets. example output if you audit capset syscalls would be: type=SYSCALL msg=audit(1225743140.465:76): arch=c000003e syscall=126 success=yes exit=0 a0=17f2014 a1=17f201c a2=80000000 a3=7fff2ab7f060 items=0 ppid=2160 pid=2223 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=1 comm="setcap" exe="/usr/sbin/setcap" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1322] msg=audit(1225743140.465:76): pid=0 cap_pi=ffffffffffffffff cap_pp=ffffffffffffffff cap_pe=ffffffffffffffff Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/audit.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8cfb9feb2a05..6fbebac7b1bf 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -100,6 +100,7 @@ #define AUDIT_TTY 1319 /* Input on an administrative TTY */ #define AUDIT_EOE 1320 /* End of multi-record event */ #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ +#define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -454,6 +455,7 @@ extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __u extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); +extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -526,6 +528,13 @@ static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t __audit_log_bprm_fcaps(bprm, pP, pE); } +static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +{ + if (unlikely(!audit_dummy_context())) + return __audit_log_capset(pid, eff, inh, perm); + return 0; +} + extern int audit_n_rules; extern int audit_signals; #else @@ -558,6 +567,7 @@ extern int audit_signals; #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) #define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) +#define audit_log_capset(pid, e, i, p) ({ 0; }) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 -- cgit v1.2.3 From 06112163f5fd9e491a7f810443d81efa9d88e247 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 22:02:50 +1100 Subject: Add a new capable interface that will be used by systems that use audit to make an A or B type decision instead of a security decision. Currently this is the case at least for filesystems when deciding if a process can use the reserved 'root' blocks and for the case of things like the oom algorithm determining if processes are root processes and should be less likely to be killed. These types of security system requests should not be audited or logged since they are not really security decisions. It would be possible to solve this problem like the vm_enough_memory security check did by creating a new LSM interface and moving all of the policy into that interface but proves the needlessly bloat the LSM and provide complex indirection. This merely allows those decisions to be made where they belong and to not flood logs or printk with denials for thing that are not security decisions. Signed-off-by: Eric Paris Acked-by: Stephen Smalley Signed-off-by: James Morris --- include/linux/capability.h | 3 +++ include/linux/security.h | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 0f1950181102..b313ba1dd5d1 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -521,6 +521,8 @@ extern const kernel_cap_t __cap_init_eff_set; kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); +extern int security_capable(struct task_struct *t, int cap); +extern int security_capable_noaudit(struct task_struct *t, int cap); /** * has_capability - Determine if a task has a superior capability available * @t: The task in question @@ -532,6 +534,7 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); * Note that this does not set PF_SUPERPRIV on the task. */ #define has_capability(t, cap) (security_capable((t), (cap)) == 0) +#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0) extern int capable(int cap); diff --git a/include/linux/security.h b/include/linux/security.h index c13f1cec9abb..5fe28a671cd3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -37,6 +37,10 @@ /* Maximum number of letters for an LSM name string */ #define SECURITY_NAME_MAX 10 +/* If capable should audit the security request */ +#define SECURITY_CAP_NOAUDIT 0 +#define SECURITY_CAP_AUDIT 1 + struct ctl_table; struct audit_krule; @@ -44,7 +48,7 @@ struct audit_krule; * These functions are in security/capability.c and are used * as the default capabilities functions */ -extern int cap_capable(struct task_struct *tsk, int cap); +extern int cap_capable(struct task_struct *tsk, int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -1307,7 +1311,7 @@ struct security_operations { kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capable) (struct task_struct *tsk, int cap); + int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); @@ -1577,6 +1581,7 @@ void security_capset_set(struct task_struct *target, kernel_cap_t *inheritable, kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); +int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); @@ -1782,7 +1787,12 @@ static inline void security_capset_set(struct task_struct *target, static inline int security_capable(struct task_struct *tsk, int cap) { - return cap_capable(tsk, cap); + return cap_capable(tsk, cap, SECURITY_CAP_AUDIT); +} + +static inline int security_capable_noaudit(struct task_struct *tsk, int cap) +{ + return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT); } static inline int security_acct(struct file *file) -- cgit v1.2.3 From 50ee91765e25e7967a7b69cd5cc2bcab85e2eeb8 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Tue, 11 Nov 2008 18:13:23 +0530 Subject: sched/rt: removed unneeded defintion Impact: cleanup This function no longer exists, so remove the defintion. Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..c6bfb34d978e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -258,8 +258,6 @@ static inline int select_nohz_load_balancer(int cpu) } #endif -extern unsigned long rt_needs_cpu(int cpu); - /* * Only dump TASK_* tasks. (0 for all tasks) */ -- cgit v1.2.3 From d90ebcbfa7f5a8b4e20518c9f94c5c4e4cd3c2e5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 12 Nov 2008 00:47:26 -0800 Subject: dccp: Query supported CCIDs This provides a data structure to record which CCIDs are locally supported and three accessor functions: - a test function for internal use which is used to validate CCID requests made by the user; - a copy function so that the list can be used for feature-negotiation; - documented getsockopt() support so that the user can query capabilities. The data structure is a table which is filled in at compile-time with the list of available CCIDs (which in turn depends on the Kconfig choices). Using the copy function for cloning the list of supported CCIDs is useful for feature negotiation, since the negotiation is now with the full list of available CCIDs (e.g. {2, 3}) instead of the default value {2}. This means negotiation will not fail if the peer requests to use CCID3 instead of CCID2. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 484b8a1fb023..d3ac1bde60b4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -209,6 +209,7 @@ struct dccp_so_feat { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 +#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.2.3 From 3f5ec13696fd4a33bde42f385406cbb1d3cc96fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 23:21:31 +0100 Subject: tracing/fastboot: move boot tracer structs and funcs into their own header. Impact: Cleanups on the boot tracer and ftrace This patch bring some cleanups about the boot tracer headers. The functions and structures of this tracer have nothing related to ftrace and should have so their own header file. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index dcbbf72a88b1..4fbc4a8b86a5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -287,45 +287,4 @@ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); #endif -/* - * Structure which defines the trace of an initcall. - * You don't have to fill the func field since it is - * only used internally by the tracer. - */ -struct boot_trace { - pid_t caller; - char func[KSYM_NAME_LEN]; - int result; - unsigned long long duration; /* usecs */ - ktime_t calltime; - ktime_t rettime; -}; - -#ifdef CONFIG_BOOT_TRACER -/* Append the trace on the ring-buffer */ -extern void trace_boot(struct boot_trace *it, initcall_t fn); - -/* Tells the tracer that smp_pre_initcall is finished. - * So we can start the tracing - */ -extern void start_boot_trace(void); - -/* Resume the tracing of other necessary events - * such as sched switches - */ -extern void enable_boot_trace(void); - -/* Suspend this tracing. Actually, only sched_switches tracing have - * to be suspended. Initcalls doesn't need it.) - */ -extern void disable_boot_trace(void); -#else -static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } -static inline void start_boot_trace(void) { } -static inline void enable_boot_trace(void) { } -static inline void disable_boot_trace(void) { } -#endif - - - #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 92a77aac9812d5397abbe6f1920e085e50838635 Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 12 Nov 2008 21:20:00 +1100 Subject: security: remove broken and useless declarations Remove broken declarations for security_capable* functions, which were not needed anyway. Signed-off-by: James Morris --- include/linux/capability.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index b313ba1dd5d1..7f26580a5a4d 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -521,8 +521,6 @@ extern const kernel_cap_t __cap_init_eff_set; kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); -extern int security_capable(struct task_struct *t, int cap); -extern int security_capable_noaudit(struct task_struct *t, int cap); /** * has_capability - Determine if a task has a superior capability available * @t: The task in question -- cgit v1.2.3 From 1f0d69a9fc815db82f15722bf05227190b1d714d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 00:14:39 -0500 Subject: tracing: profile likely and unlikely annotations Impact: new unlikely/likely profiler Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | head correct incorrect % Function File Line ------- --------- - -------- ---- ---- 2167 0 0 do_arch_prctl process_64.c 832 0 0 0 do_arch_prctl process_64.c 804 2670 0 0 IS_ERR err.h 34 71230 5693 7 __switch_to process_64.c 673 76919 0 0 __switch_to process_64.c 639 43184 33743 43 __switch_to process_64.c 624 12740 64181 83 __switch_to process_64.c 594 12740 64174 83 __switch_to process_64.c 590 # cat /debug/tracing/profile_unlikely | \ awk '{ if ($3 > 25) print $0; }' |head -20 44963 35259 43 __switch_to process_64.c 624 12762 67454 84 __switch_to process_64.c 594 12762 67447 84 __switch_to process_64.c 590 1478 595 28 syscall_get_error syscall.h 51 0 2821 100 syscall_trace_leave ptrace.c 1567 0 1 100 native_smp_prepare_cpus smpboot.c 1237 86338 265881 75 calc_delta_fair sched_fair.c 408 210410 108540 34 calc_delta_mine sched.c 1267 0 54550 100 sched_info_queued sched_stats.h 222 51899 66435 56 pick_next_task_fair sched_fair.c 1422 6 10 62 yield_task_fair sched_fair.c 982 7325 2692 26 rt_policy sched.c 144 0 1270 100 pre_schedule_rt sched_rt.c 1261 1268 48073 97 pick_next_task_rt sched_rt.c 884 0 45181 100 sched_info_dequeued sched_stats.h 177 0 15 100 sched_move_task sched.c 8700 0 15 100 sched_move_task sched.c 8690 53167 33217 38 schedule sched.c 4457 0 80208 100 sched_info_switch sched_stats.h 270 30585 49631 61 context_switch sched.c 2619 # cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }' 39900 36577 47 pick_next_task sched.c 4397 20824 15233 42 switch_mm mmu_context_64.h 18 0 7 100 __cancel_work_timer workqueue.c 560 617 66484 99 clocksource_adjust timekeeping.c 456 0 346340 100 audit_syscall_exit auditsc.c 1570 38 347350 99 audit_get_context auditsc.c 732 0 345244 100 audit_syscall_entry auditsc.c 1541 38 1017 96 audit_free auditsc.c 1446 0 1090 100 audit_alloc auditsc.c 862 2618 1090 29 audit_alloc auditsc.c 858 0 6 100 move_masked_irq migration.c 9 1 198 99 probe_sched_wakeup trace_sched_switch.c 58 2 2 50 probe_wakeup trace_sched_wakeup.c 227 0 2 100 probe_wakeup_sched_switch trace_sched_wakeup.c 144 4514 2090 31 __grab_cache_page filemap.c 2149 12882 228786 94 mapping_unevictable pagemap.h 50 4 11 73 __flush_cpu_slab slub.c 1466 627757 330451 34 slab_free slub.c 1731 2959 61245 95 dentry_lru_del_init dcache.c 153 946 1217 56 load_elf_binary binfmt_elf.c 904 102 82 44 disk_put_part genhd.h 206 1 1 50 dst_gc_task dst.c 82 0 19 100 tcp_mss_split_point tcp_output.c 1126 As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 71 hits that were more than 25%. Note: After submitting my first version of this patch, Andrew Morton showed me a version written by Daniel Walker, where I picked up the following ideas from: 1) Using __builtin_constant_p to avoid profiling fixed values. 2) Using __FILE__ instead of instruction pointers. 3) Using the preprocessor to stop all profiling of likely annotations from vsyscall_64.c. Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar for their feed back on this patch. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to have profiling disabled. Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Theodore Tso Cc: Arjan van de Ven Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 98115d9d04da..935e30cfaf3c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +struct ftrace_likely_data { + const char *func; + const char *file; + unsigned line; + unsigned long correct; + unsigned long incorrect; +}; +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +#define likely_notrace(x) __builtin_expect(!!(x), 1) +#define unlikely_notrace(x) __builtin_expect(!!(x), 0) + +#define likely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_likely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = likely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 1); \ + ______r; \ + }) +#define unlikely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_unlikely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = unlikely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 0); \ + ______r; \ + }) + +/* + * Using __builtin_constant_p(x) to ignore cases where the return + * value is always the same. This idea is taken from a similar patch + * written by Daniel Walker. + */ +# ifndef likely +# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# endif +#else +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#endif /* Optimization barrier */ #ifndef barrier -- cgit v1.2.3 From e25cf3db560e803292946ef23a30c69e341ce56f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Oct 2008 15:55:07 +0200 Subject: lockdep: include/linux/lockdep.h - fix warning in net/bluetooth/af_bluetooth.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix this warning: net/bluetooth/af_bluetooth.c:60: warning: ‘bt_key_strings’ defined but not used net/bluetooth/af_bluetooth.c:71: warning: ‘bt_slock_key_strings’ defined but not used this is a lockdep macro problem in the !LOCKDEP case. We cannot convert it to an inline because the macro works on multiple types, but we can mark the parameter used. [ also clean up a misaligned tab in sock_lock_init_class_and_name() ] [ also remove #ifdefs from around af_family_clock_key strings - which were certainly added to get rid of the ugly build warnings. ] Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index fc9f8e88123b..8956daf64abd 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -336,10 +336,11 @@ static inline void lockdep_on(void) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) -# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0) +# define lockdep_init_map(lock, name, key, sub) \ + do { (void)(name); (void)(key); } while (0) # define lockdep_set_class(lock, key) do { (void)(key); } while (0) # define lockdep_set_class_and_name(lock, key, name) \ - do { (void)(key); } while (0) + do { (void)(key); (void)(name); } while (0) #define lockdep_set_class_and_subclass(lock, key, sub) \ do { (void)(key); } while (0) #define lockdep_set_subclass(lock, sub) do { } while (0) -- cgit v1.2.3 From 2b7d0390a6d6d595f43ea3806639664afe5b9ebe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 13:17:38 +0100 Subject: tracing: branch tracer, fix vdso crash Impact: fix bootup crash the branch tracer missed arch/x86/vdso/vclock_gettime.c from disabling tracing, which caused such bootup crashes: [ 201.840097] init[1]: segfault at 7fffed3fe7c0 ip 00007fffed3fea2e sp 000077 also clean up the ugly ifdefs in arch/x86/kernel/vsyscall_64.c by creating DISABLE_UNLIKELY_PROFILE facility for code to turn off instrumentation on a per file basis. Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 935e30cfaf3c..63b7d9089d6e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,7 +59,11 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +/* + * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code + * to disable branch tracing on a per file basis. + */ +#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE) struct ftrace_likely_data { const char *func; const char *file; -- cgit v1.2.3 From 2ed84eeb8808cf3c9f039213ca137ffd7d753f0e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 15:24:24 -0500 Subject: trace: rename unlikely profiler to branch profiler Impact: name change of unlikely tracer and profiler Ingo Molnar suggested changing the config from UNLIKELY_PROFILE to BRANCH_PROFILING. I never did like the "unlikely" name so I went one step farther, and renamed all the unlikely configurations to a "BRANCH" variant. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 63b7d9089d6e..c7d804a7a4d6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,26 +59,27 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -/* - * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code - * to disable branch tracing on a per file basis. - */ -#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE) -struct ftrace_likely_data { +struct ftrace_branch_data { const char *func; const char *file; unsigned line; unsigned long correct; unsigned long incorrect; }; -void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +/* + * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code + * to disable branch tracing on a per file basis. + */ +#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING) +void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) #define likely_check(x) ({ \ int ______r; \ - static struct ftrace_likely_data \ + static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_likely"))) \ ______f = { \ @@ -93,7 +94,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); }) #define unlikely_check(x) ({ \ int ______r; \ - static struct ftrace_likely_data \ + static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_unlikely"))) \ ______f = { \ -- cgit v1.2.3 From da9592edebceeba1b9301beafe80ec8b9c2db0ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:05 +1100 Subject: CRED: Wrap task credential accesses in the filesystem subsystem Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn Cc: Al Viro Signed-off-by: James Morris --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0dcdd9458f4b..b3d404aaabed 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1193,7 +1193,7 @@ enum { #define has_fs_excl() atomic_read(¤t->fs_excl) #define is_owner_or_cap(inode) \ - ((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER)) + ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) /* not quite ready to be deprecated, but... */ extern void lock_super(struct super_block *); -- cgit v1.2.3 From e9e349b051d98799b743ebf248cc2d986fedf090 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:13 +1100 Subject: KEYS: Disperse linux/key_ui.h Disperse the bits of linux/key_ui.h as the reason they were put here (keyfs) didn't get in. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/key-ui.h | 66 -------------------------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 include/linux/key-ui.h (limited to 'include/linux') diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h deleted file mode 100644 index e8b8a7a5c496..000000000000 --- a/include/linux/key-ui.h +++ /dev/null @@ -1,66 +0,0 @@ -/* key-ui.h: key userspace interface stuff - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_KEY_UI_H -#define _LINUX_KEY_UI_H - -#include - -/* the key tree */ -extern struct rb_root key_serial_tree; -extern spinlock_t key_serial_lock; - -/* required permissions */ -#define KEY_VIEW 0x01 /* require permission to view attributes */ -#define KEY_READ 0x02 /* require permission to read content */ -#define KEY_WRITE 0x04 /* require permission to update / modify */ -#define KEY_SEARCH 0x08 /* require permission to search (keyring) or find (key) */ -#define KEY_LINK 0x10 /* require permission to link */ -#define KEY_SETATTR 0x20 /* require permission to change attributes */ -#define KEY_ALL 0x3f /* all the above permissions */ - -/* - * the keyring payload contains a list of the keys to which the keyring is - * subscribed - */ -struct keyring_list { - struct rcu_head rcu; /* RCU deletion hook */ - unsigned short maxkeys; /* max keys this list can hold */ - unsigned short nkeys; /* number of keys currently held */ - unsigned short delkey; /* key to be unlinked by RCU */ - struct key *keys[0]; -}; - -/* - * check to see whether permission is granted to use a key in the desired way - */ -extern int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, - key_perm_t perm); - -static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) -{ - return key_task_permission(key_ref, current, perm); -} - -extern key_ref_t lookup_user_key(struct task_struct *context, - key_serial_t id, int create, int partial, - key_perm_t perm); - -extern long join_session_keyring(const char *name); - -extern struct key_type *key_type_lookup(const char *type); -extern void key_type_put(struct key_type *ktype); - -#define key_negative_timeout 60 /* default timeout on a negative key's existence */ - - -#endif /* _LINUX_KEY_UI_H */ -- cgit v1.2.3 From 8bbf4976b59fc9fc2861e79cab7beb3f6d647640 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:14 +1100 Subject: KEYS: Alter use of key instantiation link-to-keyring argument Alter the use of the key instantiation and negation functions' link-to-keyring arguments. Currently this specifies a keyring in the target process to link the key into, creating the keyring if it doesn't exist. This, however, can be a problem for copy-on-write credentials as it means that the instantiating process can alter the credentials of the requesting process. This patch alters the behaviour such that: (1) If keyctl_instantiate_key() or keyctl_negate_key() are given a specific keyring by ID (ringid >= 0), then that keyring will be used. (2) If keyctl_instantiate_key() or keyctl_negate_key() are given one of the special constants that refer to the requesting process's keyrings (KEY_SPEC_*_KEYRING, all <= 0), then: (a) If sys_request_key() was given a keyring to use (destringid) then the key will be attached to that keyring. (b) If sys_request_key() was given a NULL keyring, then the key being instantiated will be attached to the default keyring as set by keyctl_set_reqkey_keyring(). (3) No extra link will be made. Decision point (1) follows current behaviour, and allows those instantiators who've searched for a specifically named keyring in the requestor's keyring so as to partition the keys by type to still have their named keyrings. Decision point (2) allows the requestor to make sure that the key or keys that get produced by request_key() go where they want, whilst allowing the instantiator to request that the key is retained. This is mainly useful for situations where the instantiator makes a secondary request, the key for which should be retained by the initial requestor: +-----------+ +--------------+ +--------------+ | | | | | | | Requestor |------->| Instantiator |------->| Instantiator | | | | | | | +-----------+ +--------------+ +--------------+ request_key() request_key() This might be useful, for example, in Kerberos, where the requestor requests a ticket, and then the ticket instantiator requests the TGT, which someone else then has to go and fetch. The TGT, however, should be retained in the keyrings of the requestor, not the first instantiator. To make this explict an extra special keyring constant is also added. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/key.h | 16 ++++++++-------- include/linux/keyctl.h | 4 +++- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 1b70e35a71e3..df709e1af3cd 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -287,11 +287,11 @@ extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); -#define __install_session_keyring(tsk, keyring) \ -({ \ - struct key *old_session = tsk->signal->session_keyring; \ - tsk->signal->session_keyring = keyring; \ - old_session; \ +#define __install_session_keyring(keyring) \ +({ \ + struct key *old_session = current->signal->session_keyring; \ + current->signal->session_keyring = keyring; \ + old_session; \ }) #else /* CONFIG_KEYS */ @@ -302,11 +302,11 @@ extern void key_init(void); #define key_revoke(k) do { } while(0) #define key_put(k) do { } while(0) #define key_ref_put(k) do { } while(0) -#define make_key_ref(k, p) ({ NULL; }) -#define key_ref_to_ptr(k) ({ NULL; }) +#define make_key_ref(k, p) NULL +#define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 #define switch_uid_keyring(u) do { } while(0) -#define __install_session_keyring(t, k) ({ NULL; }) +#define __install_session_keyring(k) ({ NULL; }) #define copy_keys(f,t) 0 #define copy_thread_group_keys(t) 0 #define exit_keys(t) do { } while(0) diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h index 656ee6b77a4a..c0688eb72093 100644 --- a/include/linux/keyctl.h +++ b/include/linux/keyctl.h @@ -1,6 +1,6 @@ /* keyctl.h: keyctl command IDs * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004, 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -20,6 +20,7 @@ #define KEY_SPEC_USER_SESSION_KEYRING -5 /* - key ID for UID-session keyring */ #define KEY_SPEC_GROUP_KEYRING -6 /* - key ID for GID-specific keyring */ #define KEY_SPEC_REQKEY_AUTH_KEY -7 /* - key ID for assumed request_key auth key */ +#define KEY_SPEC_REQUESTOR_KEYRING -8 /* - key ID for request_key() dest keyring */ /* request-key default keyrings */ #define KEY_REQKEY_DEFL_NO_CHANGE -1 @@ -30,6 +31,7 @@ #define KEY_REQKEY_DEFL_USER_KEYRING 4 #define KEY_REQKEY_DEFL_USER_SESSION_KEYRING 5 #define KEY_REQKEY_DEFL_GROUP_KEYRING 6 +#define KEY_REQKEY_DEFL_REQUESTOR_KEYRING 7 /* keyctl commands */ #define KEYCTL_GET_KEYRING_ID 0 /* ask for a keyring's ID */ -- cgit v1.2.3 From 1cdcbec1a3372c0c49c59d292e708fd07b509f18 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:14 +1100 Subject: CRED: Neuter sys_capset() Take away the ability for sys_capset() to affect processes other than current. This means that current will not need to lock its own credentials when reading them against interference by other processes. This has effectively been the case for a while anyway, since: (1) Without LSM enabled, sys_capset() is disallowed. (2) With file-based capabilities, sys_capset() is neutered. Signed-off-by: David Howells Acked-by: Serge Hallyn Acked-by: Andrew G. Morgan Acked-by: James Morris Signed-off-by: James Morris --- include/linux/security.h | 48 ++++++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 5fe28a671cd3..d1ce8beddbd7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,8 +53,8 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern int cap_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern void cap_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); @@ -1191,24 +1191,14 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Return 0 if the capability sets were successfully obtained. * @capset_check: * Check permission before setting the @effective, @inheritable, and - * @permitted capability sets for the @target process. - * Caveat: @target is also set to current if a set of processes is - * specified (i.e. all processes other than current and init or a - * particular process group). Hence, the capset_set hook may need to - * revalidate permission to the actual target process. - * @target contains the task_struct structure for target process. + * @permitted capability sets for the current process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. * Return 0 if permission is granted. * @capset_set: * Set the @effective, @inheritable, and @permitted capability sets for - * the @target process. Since capset_check cannot always check permission - * to the real @target process, this hook may also perform permission - * checking to determine if the current process is allowed to set the - * capability sets of the @target process. However, this hook has no way - * of returning an error due to the structure of the sys_capset code. - * @target contains the task_struct structure for target process. + * the current process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. @@ -1303,12 +1293,10 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (struct task_struct *target, - kernel_cap_t *effective, + int (*capset_check) (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - void (*capset_set) (struct task_struct *target, - kernel_cap_t *effective, + void (*capset_set) (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); @@ -1572,12 +1560,10 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(struct task_struct *target, - kernel_cap_t *effective, +int security_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -void security_capset_set(struct task_struct *target, - kernel_cap_t *effective, +void security_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); @@ -1769,20 +1755,18 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline int security_capset_check(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - return cap_capset_check(target, effective, inheritable, permitted); + return cap_capset_check(effective, inheritable, permitted); } -static inline void security_capset_set(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline void security_capset_set(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - cap_capset_set(target, effective, inheritable, permitted); + cap_capset_set(effective, inheritable, permitted); } static inline int security_capable(struct task_struct *tsk, int cap) -- cgit v1.2.3 From 15a2460ed0af7538ca8e6c610fe607a2cd9da142 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:15 +1100 Subject: CRED: Constify the kernel_cap_t arguments to the capset LSM hooks Constify the kernel_cap_t arguments to the capset LSM hooks. Signed-off-by: David Howells Acked-by: Serge Hallyn Acked-by: James Morris Signed-off-by: James Morris --- include/linux/security.h | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index d1ce8beddbd7..9f305d4a31a7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,8 +53,12 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern void cap_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern int cap_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); +extern void cap_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); @@ -1293,12 +1297,12 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted); - void (*capset_set) (kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted); + int (*capset_check) (const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); + void (*capset_set) (const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); @@ -1560,12 +1564,12 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted); -void security_capset_set(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted); +int security_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); +void security_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); @@ -1755,16 +1759,16 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline int security_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { return cap_capset_check(effective, inheritable, permitted); } -static inline void security_capset_set(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline void security_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { cap_capset_set(effective, inheritable, permitted); } -- cgit v1.2.3 From b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:16 +1100 Subject: CRED: Separate task security context from task_struct Separate the task security context from task_struct. At this point, the security data is temporarily embedded in the task_struct with two pointers pointing to it. Note that the Alpha arch is altered as it refers to (E)UID and (E)GID in entry.S via asm-offsets. With comment fixes Signed-off-by: Marc Dionne Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/cred.h | 155 ++++++++++++++++++++++++++++++++++++++------- include/linux/init_task.h | 24 +++++-- include/linux/sched.h | 52 ++------------- include/linux/securebits.h | 2 +- 4 files changed, 155 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index b69222cc1fd2..3e65587a72e5 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,39 +12,150 @@ #ifndef _LINUX_CRED_H #define _LINUX_CRED_H -#define get_current_user() (get_uid(current->user)) - -#define task_uid(task) ((task)->uid) -#define task_gid(task) ((task)->gid) -#define task_euid(task) ((task)->euid) -#define task_egid(task) ((task)->egid) - -#define current_uid() (current->uid) -#define current_gid() (current->gid) -#define current_euid() (current->euid) -#define current_egid() (current->egid) -#define current_suid() (current->suid) -#define current_sgid() (current->sgid) -#define current_fsuid() (current->fsuid) -#define current_fsgid() (current->fsgid) -#define current_cap() (current->cap_effective) +#include +#include +#include + +struct user_struct; +struct cred; + +/* + * COW Supplementary groups list + */ +#define NGROUPS_SMALL 32 +#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) + +struct group_info { + atomic_t usage; + int ngroups; + int nblocks; + gid_t small_block[NGROUPS_SMALL]; + gid_t *blocks[0]; +}; + +/** + * get_group_info - Get a reference to a group info structure + * @group_info: The group info to reference + * + * This must be called with the owning task locked (via task_lock()) when task + * != current. The reason being that the vast majority of callers are looking + * at current->group_info, which can not be changed except by the current task. + * Changing current->group_info requires the task lock, too. + */ +#define get_group_info(group_info) \ +do { \ + atomic_inc(&(group_info)->usage); \ +} while (0) + +/** + * put_group_info - Release a reference to a group info structure + * @group_info: The group info to release + */ +#define put_group_info(group_info) \ +do { \ + if (atomic_dec_and_test(&(group_info)->usage)) \ + groups_free(group_info); \ +} while (0) + +extern struct group_info *groups_alloc(int); +extern void groups_free(struct group_info *); +extern int set_current_groups(struct group_info *); +extern int set_groups(struct cred *, struct group_info *); +extern int groups_search(struct group_info *, gid_t); + +/* access the groups "array" with this macro */ +#define GROUP_AT(gi, i) \ + ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) + +extern int in_group_p(gid_t); +extern int in_egroup_p(gid_t); + +/* + * The security context of a task + * + * The parts of the context break down into two categories: + * + * (1) The objective context of a task. These parts are used when some other + * task is attempting to affect this one. + * + * (2) The subjective context. These details are used when the task is acting + * upon another object, be that a file, a task, a key or whatever. + * + * Note that some members of this structure belong to both categories - the + * LSM security pointer for instance. + * + * A task has two security pointers. task->real_cred points to the objective + * context that defines that task's actual details. The objective part of this + * context is used whenever that task is acted upon. + * + * task->cred points to the subjective context that defines the details of how + * that task is going to act upon another object. This may be overridden + * temporarily to point to another security context, but normally points to the + * same context as task->real_cred. + */ +struct cred { + atomic_t usage; + uid_t uid; /* real UID of the task */ + gid_t gid; /* real GID of the task */ + uid_t suid; /* saved UID of the task */ + gid_t sgid; /* saved GID of the task */ + uid_t euid; /* effective UID of the task */ + gid_t egid; /* effective GID of the task */ + uid_t fsuid; /* UID for VFS ops */ + gid_t fsgid; /* GID for VFS ops */ + unsigned securebits; /* SUID-less security management */ + kernel_cap_t cap_inheritable; /* caps our children can inherit */ + kernel_cap_t cap_permitted; /* caps we're permitted */ + kernel_cap_t cap_effective; /* caps we can actually use */ + kernel_cap_t cap_bset; /* capability bounding set */ +#ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested + * keys to */ + struct key *thread_keyring; /* keyring private to this thread */ + struct key *request_key_auth; /* assumed request_key authority */ +#endif +#ifdef CONFIG_SECURITY + void *security; /* subjective LSM security */ +#endif + struct user_struct *user; /* real user ID subscription */ + struct group_info *group_info; /* supplementary groups for euid/fsgid */ + struct rcu_head rcu; /* RCU deletion hook */ + spinlock_t lock; /* lock for pointer changes */ +}; + +#define get_current_user() (get_uid(current->cred->user)) + +#define task_uid(task) ((task)->cred->uid) +#define task_gid(task) ((task)->cred->gid) +#define task_euid(task) ((task)->cred->euid) +#define task_egid(task) ((task)->cred->egid) + +#define current_uid() (current->cred->uid) +#define current_gid() (current->cred->gid) +#define current_euid() (current->cred->euid) +#define current_egid() (current->cred->egid) +#define current_suid() (current->cred->suid) +#define current_sgid() (current->cred->sgid) +#define current_fsuid() (current->cred->fsuid) +#define current_fsgid() (current->cred->fsgid) +#define current_cap() (current->cred->cap_effective) #define current_uid_gid(_uid, _gid) \ do { \ - *(_uid) = current->uid; \ - *(_gid) = current->gid; \ + *(_uid) = current->cred->uid; \ + *(_gid) = current->cred->gid; \ } while(0) #define current_euid_egid(_uid, _gid) \ do { \ - *(_uid) = current->euid; \ - *(_gid) = current->egid; \ + *(_uid) = current->cred->euid; \ + *(_gid) = current->cred->egid; \ } while(0) #define current_fsuid_fsgid(_uid, _gid) \ do { \ - *(_uid) = current->fsuid; \ - *(_gid) = current->fsgid; \ + *(_uid) = current->cred->fsuid; \ + *(_gid) = current->cred->fsgid; \ } while(0) #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd8909b9e5..9de41ccd67b5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,6 +113,21 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +extern struct cred init_cred; + +#define INIT_CRED(p) \ +{ \ + .usage = ATOMIC_INIT(3), \ + .securebits = SECUREBITS_DEFAULT, \ + .cap_inheritable = CAP_INIT_INH_SET, \ + .cap_permitted = CAP_FULL_SET, \ + .cap_effective = CAP_INIT_EFF_SET, \ + .cap_bset = CAP_INIT_BSET, \ + .user = INIT_USER, \ + .group_info = &init_groups, \ + .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ +} + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -147,13 +162,8 @@ extern struct group_info init_groups; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .group_info = &init_groups, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .securebits = SECUREBITS_DEFAULT, \ - .user = INIT_USER, \ + .__temp_cred = INIT_CRED(tsk.__temp_cred), \ + .cred = &tsk.__temp_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..c8b92502354d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -660,6 +660,7 @@ extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) + struct backing_dev_info; struct reclaim_state; @@ -883,38 +884,7 @@ partition_sched_domains(int ndoms_new, cpumask_t *doms_new, #endif /* !CONFIG_SMP */ struct io_context; /* See blkdev.h */ -#define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -struct group_info { - int ngroups; - atomic_t usage; - gid_t small_block[NGROUPS_SMALL]; - int nblocks; - gid_t *blocks[0]; -}; - -/* - * get_group_info() must be called with the owning task locked (via task_lock()) - * when task != current. The reason being that the vast majority of callers are - * looking at current->group_info, which can not be changed except by the - * current task. Changing current->group_info requires the task lock, too. - */ -#define get_group_info(group_info) do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) -#define put_group_info(group_info) do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ - groups_free(group_info); \ -} while (0) - -extern struct group_info *groups_alloc(int gidsetsize); -extern void groups_free(struct group_info *group_info); -extern int set_current_groups(struct group_info *group_info); -extern int groups_search(struct group_info *group_info, gid_t grp); -/* access the groups "array" with this macro */ -#define GROUP_AT(gi, i) \ - ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK extern void prefetch_stack(struct task_struct *t); @@ -1181,17 +1151,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - struct user_struct *user; - unsigned securebits; -#ifdef CONFIG_KEYS - unsigned char jit_keyring; /* default keyring to attach requested keys to */ - struct key *request_key_auth; /* assumed request_key authority */ - struct key *thread_keyring; /* keyring private to this thread */ -#endif + struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ + struct cred *cred; /* actual/objective task credentials */ + char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) @@ -1228,9 +1190,6 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; -#ifdef CONFIG_SECURITY - void *security; -#endif struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL uid_t loginuid; @@ -1787,9 +1746,6 @@ extern void wake_up_new_task(struct task_struct *tsk, extern void sched_fork(struct task_struct *p, int clone_flags); extern void sched_dead(struct task_struct *p); -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); - extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 92f09bdf1175..6d389491bfa2 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->securebits) +#define issecure(X) (issecure_mask(X) & current->cred->securebits) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ -- cgit v1.2.3 From f1752eec6145c97163dbce62d17cf5d928e28a27 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:17 +1100 Subject: CRED: Detach the credentials from task_struct Detach the credentials from task_struct, duplicating them in copy_process() and releasing them in __put_task_struct(). Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/cred.h | 29 +++++++++++++++++++++++++++++ include/linux/init_task.h | 16 +--------------- include/linux/sched.h | 1 - include/linux/security.h | 26 +++++++++++++------------- 4 files changed, 43 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 3e65587a72e5..a7a686074cb0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -158,4 +158,33 @@ do { \ *(_gid) = current->cred->fsgid; \ } while(0) +extern void __put_cred(struct cred *); +extern int copy_creds(struct task_struct *, unsigned long); + +/** + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. + */ +static inline struct cred *get_cred(struct cred *cred) +{ + atomic_inc(&cred->usage); + return cred; +} + +/** + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. + */ +static inline void put_cred(struct cred *cred) +{ + if (atomic_dec_and_test(&(cred)->usage)) + __put_cred(cred); +} + #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9de41ccd67b5..5e24c54b6dfd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -115,19 +115,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -#define INIT_CRED(p) \ -{ \ - .usage = ATOMIC_INIT(3), \ - .securebits = SECUREBITS_DEFAULT, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .user = INIT_USER, \ - .group_info = &init_groups, \ - .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ -} - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -162,8 +149,7 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .__temp_cred = INIT_CRED(tsk.__temp_cred), \ - .cred = &tsk.__temp_cred, \ + .cred = &init_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8b92502354d..740cf946c8cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,6 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ struct cred *cred; /* actual/objective task credentials */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/include/linux/security.h b/include/linux/security.h index 9f305d4a31a7..9239cc11eb9c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -593,15 +593,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. - * @task_alloc_security: - * @p contains the task_struct for child process. - * Allocate and attach a security structure to the p->security field. The - * security field is initialized to NULL when the task structure is + * @cred_alloc_security: + * @cred contains the cred struct for child process. + * Allocate and attach a security structure to the cred->security field. + * The security field is initialized to NULL when the task structure is * allocated. * Return 0 if operation was successful. - * @task_free_security: - * @p contains the task_struct for process. - * Deallocate and clear the p->security field. + * @cred_free: + * @cred points to the credentials. + * Deallocate and clear the cred->security field in a set of credentials. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -1405,8 +1405,8 @@ struct security_operations { int (*dentry_open) (struct file *file); int (*task_create) (unsigned long clone_flags); - int (*task_alloc_security) (struct task_struct *p); - void (*task_free_security) (struct task_struct *p); + int (*cred_alloc_security) (struct cred *cred); + void (*cred_free) (struct cred *cred); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ , uid_t old_euid, uid_t old_suid, int flags); @@ -1660,8 +1660,8 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_dentry_open(struct file *file); int security_task_create(unsigned long clone_flags); -int security_task_alloc(struct task_struct *p); -void security_task_free(struct task_struct *p); +int security_cred_alloc(struct cred *cred); +void security_cred_free(struct cred *cred); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); @@ -2181,12 +2181,12 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } -static inline int security_task_alloc(struct task_struct *p) +static inline int security_cred_alloc(struct cred *cred) { return 0; } -static inline void security_task_free(struct task_struct *p) +static inline void security_cred_free(struct cred *cred) { } static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, -- cgit v1.2.3 From 86a264abe542cfececb4df129bc45a0338d8cdb9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:18 +1100 Subject: CRED: Wrap current->cred and a few other accessors Wrap current->cred and a few other accessors to hide their actual implementation. Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/cred.h | 187 ++++++++++++++++++++++++++++++++++----------- include/linux/securebits.h | 2 +- 2 files changed, 144 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index a7a686074cb0..4221ec6000c1 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -37,15 +37,16 @@ struct group_info { * get_group_info - Get a reference to a group info structure * @group_info: The group info to reference * - * This must be called with the owning task locked (via task_lock()) when task - * != current. The reason being that the vast majority of callers are looking - * at current->group_info, which can not be changed except by the current task. - * Changing current->group_info requires the task lock, too. + * This gets a reference to a set of supplementary groups. + * + * If the caller is accessing a task's credentials, they must hold the RCU read + * lock when reading. */ -#define get_group_info(group_info) \ -do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) +static inline struct group_info *get_group_info(struct group_info *gi) +{ + atomic_inc(&gi->usage); + return gi; +} /** * put_group_info - Release a reference to a group info structure @@ -61,7 +62,7 @@ extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); -extern int groups_search(struct group_info *, gid_t); +extern int groups_search(const struct group_info *, gid_t); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ @@ -123,41 +124,6 @@ struct cred { spinlock_t lock; /* lock for pointer changes */ }; -#define get_current_user() (get_uid(current->cred->user)) - -#define task_uid(task) ((task)->cred->uid) -#define task_gid(task) ((task)->cred->gid) -#define task_euid(task) ((task)->cred->euid) -#define task_egid(task) ((task)->cred->egid) - -#define current_uid() (current->cred->uid) -#define current_gid() (current->cred->gid) -#define current_euid() (current->cred->euid) -#define current_egid() (current->cred->egid) -#define current_suid() (current->cred->suid) -#define current_sgid() (current->cred->sgid) -#define current_fsuid() (current->cred->fsuid) -#define current_fsgid() (current->cred->fsgid) -#define current_cap() (current->cred->cap_effective) - -#define current_uid_gid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->uid; \ - *(_gid) = current->cred->gid; \ -} while(0) - -#define current_euid_egid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->euid; \ - *(_gid) = current->cred->egid; \ -} while(0) - -#define current_fsuid_fsgid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->fsuid; \ - *(_gid) = current->cred->fsgid; \ -} while(0) - extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); @@ -187,4 +153,137 @@ static inline void put_cred(struct cred *cred) __put_cred(cred); } +/** + * current_cred - Access the current task's credentials + * + * Access the credentials of the current task. + */ +#define current_cred() \ + (current->cred) + +/** + * __task_cred - Access another task's credentials + * @task: The task to query + * + * Access the credentials of another task. The caller must hold the + * RCU readlock. + * + * The caller must make sure task doesn't go away, either by holding a ref on + * task or by holding tasklist_lock to prevent it from being unlinked. + */ +#define __task_cred(task) \ + ((const struct cred *)(rcu_dereference((task)->cred))) + +/** + * get_task_cred - Get another task's credentials + * @task: The task to query + * + * Get the credentials of a task, pinning them so that they can't go away. + * Accessing a task's credentials directly is not permitted. + * + * The caller must make sure task doesn't go away, either by holding a ref on + * task or by holding tasklist_lock to prevent it from being unlinked. + */ +#define get_task_cred(task) \ +({ \ + struct cred *__cred; \ + rcu_read_lock(); \ + __cred = (struct cred *) __task_cred((task)); \ + get_cred(__cred); \ + rcu_read_unlock(); \ + __cred; \ +}) + +/** + * get_current_cred - Get the current task's credentials + * + * Get the credentials of the current task, pinning them so that they can't go + * away. Accessing the current task's credentials directly is not permitted. + */ +#define get_current_cred() \ + (get_cred(current_cred())) + +/** + * get_current_user - Get the current task's user_struct + * + * Get the user record of the current task, pinning it so that it can't go + * away. + */ +#define get_current_user() \ +({ \ + struct user_struct *__u; \ + struct cred *__cred; \ + __cred = (struct cred *) current_cred(); \ + __u = get_uid(__cred->user); \ + __u; \ +}) + +/** + * get_current_groups - Get the current task's supplementary group list + * + * Get the supplementary group list of the current task, pinning it so that it + * can't go away. + */ +#define get_current_groups() \ +({ \ + struct group_info *__groups; \ + struct cred *__cred; \ + __cred = (struct cred *) current_cred(); \ + __groups = get_group_info(__cred->group_info); \ + __groups; \ +}) + +#define task_cred_xxx(task, xxx) \ +({ \ + __typeof__(task->cred->xxx) ___val; \ + rcu_read_lock(); \ + ___val = __task_cred((task))->xxx; \ + rcu_read_unlock(); \ + ___val; \ +}) + +#define task_uid(task) (task_cred_xxx((task), uid)) +#define task_euid(task) (task_cred_xxx((task), euid)) + +#define current_cred_xxx(xxx) \ +({ \ + current->cred->xxx; \ +}) + +#define current_uid() (current_cred_xxx(uid)) +#define current_gid() (current_cred_xxx(gid)) +#define current_euid() (current_cred_xxx(euid)) +#define current_egid() (current_cred_xxx(egid)) +#define current_suid() (current_cred_xxx(suid)) +#define current_sgid() (current_cred_xxx(sgid)) +#define current_fsuid() (current_cred_xxx(fsuid)) +#define current_fsgid() (current_cred_xxx(fsgid)) +#define current_cap() (current_cred_xxx(cap_effective)) +#define current_user() (current_cred_xxx(user)) +#define current_security() (current_cred_xxx(security)) + +#define current_uid_gid(_uid, _gid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_uid) = __cred->uid; \ + *(_gid) = __cred->gid; \ +} while(0) + +#define current_euid_egid(_euid, _egid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_euid) = __cred->euid; \ + *(_egid) = __cred->egid; \ +} while(0) + +#define current_fsuid_fsgid(_fsuid, _fsgid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_fsuid) = __cred->fsuid; \ + *(_fsgid) = __cred->fsgid; \ +} while(0) + #endif /* _LINUX_CRED_H */ diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 6d389491bfa2..d2c5ed845bcc 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->cred->securebits) +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ -- cgit v1.2.3 From c69e8d9c01db2adc503464993c358901c9af9de4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:19 +1100 Subject: CRED: Use RCU to access another task's creds and to release a task's own creds Use RCU to access another task's creds and to release a task's own creds. This means that it will be possible for the credentials of a task to be replaced without another task (a) requiring a full lock to read them, and (b) seeing deallocated memory. Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/cred.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 4221ec6000c1..166ce4ddba64 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -147,8 +147,9 @@ static inline struct cred *get_cred(struct cred *cred) * Release a reference to a set of credentials, deleting them when the last ref * is released. */ -static inline void put_cred(struct cred *cred) +static inline void put_cred(const struct cred *_cred) { + struct cred *cred = (struct cred *) _cred; if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } -- cgit v1.2.3 From bb952bb98a7e479262c7eb25d5592545a3af147d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:20 +1100 Subject: CRED: Separate per-task-group keyrings from signal_struct Separate per-task-group keyrings from signal_struct and dangle their anchor from the cred struct rather than the signal_struct. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/cred.h | 16 ++++++++++++++++ include/linux/key.h | 8 ++------ include/linux/sched.h | 6 ------ 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 166ce4ddba64..62b9e532422d 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -71,6 +71,21 @@ extern int groups_search(const struct group_info *, gid_t); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); +/* + * The common credentials for a thread group + * - shared by CLONE_THREAD + */ +#ifdef CONFIG_KEYS +struct thread_group_cred { + atomic_t usage; + pid_t tgid; /* thread group process ID */ + spinlock_t lock; + struct key *session_keyring; /* keyring inherited over fork */ + struct key *process_keyring; /* keyring private to this process */ + struct rcu_head rcu; /* RCU deletion hook */ +}; +#endif + /* * The security context of a task * @@ -114,6 +129,7 @@ struct cred { * keys to */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ + struct thread_group_cred *tgcred; /* thread-group shared credentials */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ diff --git a/include/linux/key.h b/include/linux/key.h index df709e1af3cd..0836cc838b0c 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -278,9 +278,7 @@ extern ctl_table key_sysctls[]; */ extern void switch_uid_keyring(struct user_struct *new_user); extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); -extern int copy_thread_group_keys(struct task_struct *tsk); extern void exit_keys(struct task_struct *tsk); -extern void exit_thread_group_keys(struct signal_struct *tg); extern int suid_keys(struct task_struct *tsk); extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); @@ -289,8 +287,8 @@ extern void key_init(void); #define __install_session_keyring(keyring) \ ({ \ - struct key *old_session = current->signal->session_keyring; \ - current->signal->session_keyring = keyring; \ + struct key *old_session = current->cred->tgcred->session_keyring; \ + current->cred->tgcred->session_keyring = keyring; \ old_session; \ }) @@ -308,9 +306,7 @@ extern void key_init(void); #define switch_uid_keyring(u) do { } while(0) #define __install_session_keyring(k) ({ NULL; }) #define copy_keys(f,t) 0 -#define copy_thread_group_keys(t) 0 #define exit_keys(t) do { } while(0) -#define exit_thread_group_keys(tg) do { } while(0) #define suid_keys(t) do { } while(0) #define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 740cf946c8cc..2913252989b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -571,12 +571,6 @@ struct signal_struct { */ struct rlimit rlim[RLIM_NLIMITS]; - /* keep the process-shared keyrings here so that they do the right - * thing in threads created with CLONE_THREAD */ -#ifdef CONFIG_KEYS - struct key *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ -#endif #ifdef CONFIG_BSD_PROCESS_ACCT struct pacct_struct pacct; /* per-process accounting information */ #endif -- cgit v1.2.3 From 745ca2475a6ac596e3d8d37c2759c0fbe2586227 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:22 +1100 Subject: CRED: Pass credentials through dentry_open() Pass credentials through dentry_open() so that the COW creds patch can have SELinux's flush_unauthorized_files() pass the appropriate creds back to itself when it opens its null chardev. The security_dentry_open() call also now takes a creds pointer, as does the dentry_open hook in struct security_operations. Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: James Morris --- include/linux/fs.h | 4 +++- include/linux/security.h | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b3d404aaabed..3bfec1327b8d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,7 @@ struct poll_table_struct; struct kstatfs; struct vm_area_struct; struct vfsmount; +struct cred; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -1673,7 +1674,8 @@ extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, extern long do_sys_open(int dfd, const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); -extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); +extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, + const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); diff --git a/include/linux/security.h b/include/linux/security.h index 9239cc11eb9c..7e9fe046a0d1 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1402,7 +1402,7 @@ struct security_operations { int (*file_send_sigiotask) (struct task_struct *tsk, struct fown_struct *fown, int sig); int (*file_receive) (struct file *file); - int (*dentry_open) (struct file *file); + int (*dentry_open) (struct file *file, const struct cred *cred); int (*task_create) (unsigned long clone_flags); int (*cred_alloc_security) (struct cred *cred); @@ -1658,7 +1658,7 @@ int security_file_set_fowner(struct file *file); int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); -int security_dentry_open(struct file *file); +int security_dentry_open(struct file *file, const struct cred *cred); int security_task_create(unsigned long clone_flags); int security_cred_alloc(struct cred *cred); void security_cred_free(struct cred *cred); @@ -2171,7 +2171,8 @@ static inline int security_file_receive(struct file *file) return 0; } -static inline int security_dentry_open(struct file *file) +static inline int security_dentry_open(struct file *file, + const struct cred *cred) { return 0; } -- cgit v1.2.3 From d84f4f992cbd76e8f39c488cf0c5d123843923b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:23 +1100 Subject: CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: James Morris --- include/linux/audit.h | 22 ++++-- include/linux/capability.h | 2 - include/linux/cred.h | 44 ++++++++--- include/linux/init_task.h | 2 + include/linux/key.h | 22 +----- include/linux/sched.h | 6 +- include/linux/security.h | 178 +++++++++++++++++++++------------------------ 7 files changed, 139 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6fbebac7b1bf..0b2fcb698a63 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -454,8 +454,10 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); -extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); -extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm); +extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, + const struct cred *old); +extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -522,16 +524,20 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) * * -Eric */ -static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, + const struct cred *old) { if (unlikely(!audit_dummy_context())) - __audit_log_bprm_fcaps(bprm, pP, pE); + return __audit_log_bprm_fcaps(bprm, new, old); + return 0; } -static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +static inline int audit_log_capset(pid_t pid, const struct cred *new, + const struct cred *old) { if (unlikely(!audit_dummy_context())) - return __audit_log_capset(pid, eff, inh, perm); + return __audit_log_capset(pid, new, old); return 0; } @@ -566,8 +572,8 @@ extern int audit_signals; #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) -#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) -#define audit_log_capset(pid, e, i, p) ({ 0; }) +#define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) +#define audit_log_capset(pid, ncr, ocr) ({ 0; }) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/include/linux/capability.h b/include/linux/capability.h index 7f26580a5a4d..e22f48c2a46f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -519,8 +519,6 @@ extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_full_set; extern const kernel_cap_t __cap_init_eff_set; -kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); - /** * has_capability - Determine if a task has a superior capability available * @t: The task in question diff --git a/include/linux/cred.h b/include/linux/cred.h index 62b9e532422d..eaf6fa695a04 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,6 +84,8 @@ struct thread_group_cred { struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; + +extern void release_tgcred(struct cred *cred); #endif /* @@ -137,11 +139,30 @@ struct cred { struct user_struct *user; /* real user ID subscription */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ - spinlock_t lock; /* lock for pointer changes */ }; extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); +extern struct cred *prepare_creds(void); +extern struct cred *prepare_usermodehelper_creds(void); +extern int commit_creds(struct cred *); +extern void abort_creds(struct cred *); +extern const struct cred *override_creds(const struct cred *) __deprecated; +extern void revert_creds(const struct cred *) __deprecated; +extern void __init cred_init(void); + +/** + * get_new_cred - Get a reference on a new set of credentials + * @cred: The new credentials to reference + * + * Get a reference on the specified set of new credentials. The caller must + * release the reference. + */ +static inline struct cred *get_new_cred(struct cred *cred) +{ + atomic_inc(&cred->usage); + return cred; +} /** * get_cred - Get a reference on a set of credentials @@ -150,10 +171,9 @@ extern int copy_creds(struct task_struct *, unsigned long); * Get a reference on the specified set of credentials. The caller must * release the reference. */ -static inline struct cred *get_cred(struct cred *cred) +static inline const struct cred *get_cred(const struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred((struct cred *) cred); } /** @@ -166,6 +186,8 @@ static inline struct cred *get_cred(struct cred *cred) static inline void put_cred(const struct cred *_cred) { struct cred *cred = (struct cred *) _cred; + + BUG_ON(atomic_read(&(cred)->usage) <= 0); if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } @@ -250,13 +272,13 @@ static inline void put_cred(const struct cred *_cred) __groups; \ }) -#define task_cred_xxx(task, xxx) \ -({ \ - __typeof__(task->cred->xxx) ___val; \ - rcu_read_lock(); \ - ___val = __task_cred((task))->xxx; \ - rcu_read_unlock(); \ - ___val; \ +#define task_cred_xxx(task, xxx) \ +({ \ + __typeof__(((struct cred *)NULL)->xxx) ___val; \ + rcu_read_lock(); \ + ___val = __task_cred((task))->xxx; \ + rcu_read_unlock(); \ + ___val; \ }) #define task_uid(task) (task_cred_xxx((task), uid)) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5e24c54b6dfd..08c3b24ad9a8 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -150,6 +150,8 @@ extern struct cred init_cred; .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ .cred = &init_cred, \ + .cred_exec_mutex = \ + __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/key.h b/include/linux/key.h index 0836cc838b0c..69ecf0934b02 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -73,6 +73,7 @@ struct key; struct seq_file; struct user_struct; struct signal_struct; +struct cred; struct key_type; struct key_owner; @@ -181,7 +182,7 @@ struct key { extern struct key *key_alloc(struct key_type *type, const char *desc, uid_t uid, gid_t gid, - struct task_struct *ctx, + const struct cred *cred, key_perm_t perm, unsigned long flags); @@ -249,7 +250,7 @@ extern int key_unlink(struct key *keyring, struct key *key); extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - struct task_struct *ctx, + const struct cred *cred, unsigned long flags, struct key *dest); @@ -276,22 +277,12 @@ extern ctl_table key_sysctls[]; /* * the userspace interface */ -extern void switch_uid_keyring(struct user_struct *new_user); -extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); -extern void exit_keys(struct task_struct *tsk); -extern int suid_keys(struct task_struct *tsk); +extern int install_thread_keyring_to_cred(struct cred *cred); extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); -#define __install_session_keyring(keyring) \ -({ \ - struct key *old_session = current->cred->tgcred->session_keyring; \ - current->cred->tgcred->session_keyring = keyring; \ - old_session; \ -}) - #else /* CONFIG_KEYS */ #define key_validate(k) 0 @@ -303,11 +294,6 @@ extern void key_init(void); #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 -#define switch_uid_keyring(u) do { } while(0) -#define __install_session_keyring(k) ({ NULL; }) -#define copy_keys(f,t) 0 -#define exit_keys(t) do { } while(0) -#define suid_keys(t) do { } while(0) #define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) #define key_fsgid_changed(t) do { } while(0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2913252989b3..121d655e460d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1145,7 +1145,8 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - struct cred *cred; /* actual/objective task credentials */ + const struct cred *cred; /* actual/objective task credentials (COW) */ + struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1720,7 +1721,6 @@ static inline struct user_struct *get_uid(struct user_struct *u) return u; } extern void free_uid(struct user_struct *); -extern void switch_uid(struct user_struct *); extern void release_uids(struct user_namespace *ns); #include @@ -1870,6 +1870,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) +extern bool is_single_threaded(struct task_struct *); + /* * Careful: do_each_thread/while_each_thread is a double loop so * 'break' will not work as expected - use goto instead. diff --git a/include/linux/security.h b/include/linux/security.h index 7e9fe046a0d1..68be11251447 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,24 +53,21 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); -extern void cap_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); +extern int cap_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); -extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); -extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); -extern void cap_task_reparent_to_init(struct task_struct *p); +extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p); + unsigned long arg4, unsigned long arg5); extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); extern int cap_task_setioprio(struct task_struct *p, int ioprio); extern int cap_task_setnice(struct task_struct *p, int nice); @@ -170,8 +167,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Compute and set the security attributes of a process being transformed * by an execve operation based on the old attributes (current->security) * and the information saved in @bprm->security by the set_security hook. - * Since this hook function (and its caller) are void, this hook can not - * return an error. However, it can leave the security attributes of the + * Since this function may return an error, in which case the process will + * be killed. However, it can leave the security attributes of the * process unchanged if an access failure occurs at this point. * bprm_apply_creds is called under task_lock. @unsafe indicates various * reasons why it may be unsafe to change security state. @@ -593,15 +590,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. - * @cred_alloc_security: - * @cred contains the cred struct for child process. - * Allocate and attach a security structure to the cred->security field. - * The security field is initialized to NULL when the task structure is - * allocated. - * Return 0 if operation was successful. * @cred_free: * @cred points to the credentials. * Deallocate and clear the cred->security field in a set of credentials. + * @cred_prepare: + * @new points to the new credentials. + * @old points to the original credentials. + * @gfp indicates the atomicity of any memory allocations. + * Prepare a new set of credentials by copying the data from the old set. + * @cred_commit: + * @new points to the new credentials. + * @old points to the original credentials. + * Install a new set of credentials. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -614,15 +614,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @id2 contains a uid. * @flags contains one of the LSM_SETID_* values. * Return 0 if permission is granted. - * @task_post_setuid: + * @task_fix_setuid: * Update the module's state after setting one or more of the user * identity attributes of the current process. The @flags parameter * indicates which of the set*uid system calls invoked this hook. If - * @flags is LSM_SETID_FS, then @old_ruid is the old fs uid and the other - * parameters are not used. - * @old_ruid contains the old real uid (or fs uid if LSM_SETID_FS). - * @old_euid contains the old effective uid (or -1 if LSM_SETID_FS). - * @old_suid contains the old saved uid (or -1 if LSM_SETID_FS). + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaces * @flags contains one of the LSM_SETID_* values. * Return 0 on success. * @task_setgid: @@ -725,13 +723,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @arg3 contains a argument. * @arg4 contains a argument. * @arg5 contains a argument. - * @rc_p contains a pointer to communicate back the forced return code - * Return 0 if permission is granted, and non-zero if the security module - * has taken responsibility (setting *rc_p) for the prctl call. - * @task_reparent_to_init: - * Set the security attributes in @p->security for a kernel thread that - * is being reparented to the init task. - * @p contains the task_struct for the kernel thread. + * Return -ENOSYS if no-one wanted to handle this op, any other value to + * cause prctl() to return immediately with that value. * @task_to_inode: * Set the security attributes for an inode based on an associated task's * security attributes, e.g. for /proc/pid inodes. @@ -1008,7 +1001,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * See whether a specific operational right is granted to a process on a * key. * @key_ref refers to the key (key pointer + possession attribute bit). - * @context points to the process to provide the context against which to + * @cred points to the credentials to provide the context against which to * evaluate the security data on the key. * @perm describes the combination of permissions required of this key. * Return 1 if permission granted, 0 if permission denied and -ve it the @@ -1170,6 +1163,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @child process. * Security modules may also want to perform a process tracing check * during an execve in the set_security or apply_creds hooks of + * tracing check during an execve in the bprm_set_creds hook of * binprm_security_ops if the process is being traced and its security * attributes would be changed by the execve. * @child contains the task_struct structure for the target process. @@ -1193,19 +1187,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. * Return 0 if the capability sets were successfully obtained. - * @capset_check: - * Check permission before setting the @effective, @inheritable, and - * @permitted capability sets for the current process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 if permission is granted. - * @capset_set: + * @capset: * Set the @effective, @inheritable, and @permitted capability sets for * the current process. + * @new contains the new credentials structure for target process. + * @old contains the current credentials structure for target process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. + * Return 0 and update @new if permission is granted. * @capable: * Check whether the @tsk process has the @cap capability. * @tsk contains the task_struct for the process. @@ -1297,12 +1287,11 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - void (*capset_set) (const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); + int (*capset) (struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); @@ -1314,7 +1303,7 @@ struct security_operations { int (*bprm_alloc_security) (struct linux_binprm *bprm); void (*bprm_free_security) (struct linux_binprm *bprm); - void (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); + int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); void (*bprm_post_apply_creds) (struct linux_binprm *bprm); int (*bprm_set_security) (struct linux_binprm *bprm); int (*bprm_check_security) (struct linux_binprm *bprm); @@ -1405,11 +1394,13 @@ struct security_operations { int (*dentry_open) (struct file *file, const struct cred *cred); int (*task_create) (unsigned long clone_flags); - int (*cred_alloc_security) (struct cred *cred); void (*cred_free) (struct cred *cred); + int (*cred_prepare)(struct cred *new, const struct cred *old, + gfp_t gfp); + void (*cred_commit)(struct cred *new, const struct cred *old); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); - int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ , - uid_t old_euid, uid_t old_suid, int flags); + int (*task_fix_setuid) (struct cred *new, const struct cred *old, + int flags); int (*task_setgid) (gid_t id0, gid_t id1, gid_t id2, int flags); int (*task_setpgid) (struct task_struct *p, pid_t pgid); int (*task_getpgid) (struct task_struct *p); @@ -1429,8 +1420,7 @@ struct security_operations { int (*task_wait) (struct task_struct *p); int (*task_prctl) (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, long *rc_p); - void (*task_reparent_to_init) (struct task_struct *p); + unsigned long arg5); void (*task_to_inode) (struct task_struct *p, struct inode *inode); int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag); @@ -1535,10 +1525,10 @@ struct security_operations { /* key management security hooks */ #ifdef CONFIG_KEYS - int (*key_alloc) (struct key *key, struct task_struct *tsk, unsigned long flags); + int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags); void (*key_free) (struct key *key); int (*key_permission) (key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm); int (*key_getsecurity)(struct key *key, char **_buffer); #endif /* CONFIG_KEYS */ @@ -1564,12 +1554,10 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); -void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); +int security_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); @@ -1583,7 +1571,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_vm_enough_memory_kern(long pages); int security_bprm_alloc(struct linux_binprm *bprm); void security_bprm_free(struct linux_binprm *bprm); -void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); void security_bprm_post_apply_creds(struct linux_binprm *bprm); int security_bprm_set(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); @@ -1660,11 +1648,12 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_dentry_open(struct file *file, const struct cred *cred); int security_task_create(unsigned long clone_flags); -int security_cred_alloc(struct cred *cred); void security_cred_free(struct cred *cred); +int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); +void security_commit_creds(struct cred *new, const struct cred *old); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); -int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags); +int security_task_fix_setuid(struct cred *new, const struct cred *old, + int flags); int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); @@ -1683,8 +1672,7 @@ int security_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid); int security_task_wait(struct task_struct *p); int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p); -void security_task_reparent_to_init(struct task_struct *p); + unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); @@ -1759,18 +1747,13 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +static inline int security_capset(struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - return cap_capset_check(effective, inheritable, permitted); -} - -static inline void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - cap_capset_set(effective, inheritable, permitted); + return cap_capset(new, old, effective, inheritable, permitted); } static inline int security_capable(struct task_struct *tsk, int cap) @@ -1837,9 +1820,9 @@ static inline int security_bprm_alloc(struct linux_binprm *bprm) static inline void security_bprm_free(struct linux_binprm *bprm) { } -static inline void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { - cap_bprm_apply_creds(bprm, unsafe); + return cap_bprm_apply_creds(bprm, unsafe); } static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm) @@ -2182,13 +2165,20 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } -static inline int security_cred_alloc(struct cred *cred) +static inline void security_cred_free(struct cred *cred) +{ } + +static inline int security_prepare_creds(struct cred *new, + const struct cred *old, + gfp_t gfp) { return 0; } -static inline void security_cred_free(struct cred *cred) -{ } +static inline void security_commit_creds(struct cred *new, + const struct cred *old) +{ +} static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -2196,10 +2186,11 @@ static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, return 0; } -static inline int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags) +static inline int security_task_fix_setuid(struct cred *new, + const struct cred *old, + int flags) { - return cap_task_post_setuid(old_ruid, old_euid, old_suid, flags); + return cap_task_fix_setuid(new, old, flags); } static inline int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, @@ -2286,14 +2277,9 @@ static inline int security_task_wait(struct task_struct *p) static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, long *rc_p) -{ - return cap_task_prctl(option, arg2, arg3, arg3, arg5, rc_p); -} - -static inline void security_task_reparent_to_init(struct task_struct *p) + unsigned long arg5) { - cap_task_reparent_to_init(p); + return cap_task_prctl(option, arg2, arg3, arg3, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) @@ -2719,16 +2705,16 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #ifdef CONFIG_KEYS #ifdef CONFIG_SECURITY -int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags); +int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags); void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm); + const struct cred *cred, key_perm_t perm); int security_key_getsecurity(struct key *key, char **_buffer); #else static inline int security_key_alloc(struct key *key, - struct task_struct *tsk, + const struct cred *cred, unsigned long flags) { return 0; @@ -2739,7 +2725,7 @@ static inline void security_key_free(struct key *key) } static inline int security_key_permission(key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm) { return 0; -- cgit v1.2.3 From a6f76f23d297f70e2a6b3ec607f7aeeea9e37e8d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:24 +1100 Subject: CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/audit.h | 16 -------- include/linux/binfmts.h | 16 +++++--- include/linux/cred.h | 3 +- include/linux/key.h | 2 - include/linux/security.h | 103 +++++++++++++++++------------------------------ 5 files changed, 48 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 0b2fcb698a63..e8ce2c4c7ac7 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -508,22 +508,6 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) return 0; } -/* - * ieieeeeee, an audit function without a return code! - * - * This function might fail! I decided that it didn't matter. We are too late - * to fail the syscall and the information isn't REQUIRED for any purpose. It's - * just nice to have. We should be able to look at past audit logs to figure - * out this process's current cap set along with the fcaps from the PATH record - * and use that to come up with the final set. Yeah, its ugly, but all the info - * is still in the audit log. So I'm not going to bother mentioning we failed - * if we couldn't allocate memory. - * - * If someone changes their mind they could create the aux record earlier and - * then search here and use that earlier allocation. But I don't wanna. - * - * -Eric - */ static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 7394b5b349ff..6cbfbe297180 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -35,16 +35,20 @@ struct linux_binprm{ struct mm_struct *mm; unsigned long p; /* current top of mem */ unsigned int sh_bang:1, - misc_bang:1; + misc_bang:1, + cred_prepared:1,/* true if creds already prepared (multiple + * preps happen for interpreters) */ + cap_effective:1;/* true if has elevated effective capabilities, + * false if not; except for init which inherits + * its parent's caps anyway */ #ifdef __alpha__ unsigned int taso:1; #endif unsigned int recursion_depth; struct file * file; - int e_uid, e_gid; - kernel_cap_t cap_post_exec_permitted; - bool cap_effective; - void *security; + struct cred *cred; /* new credentials */ + int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ + unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; char * filename; /* Name of binary as seen by procps */ char * interp; /* Name of the binary really executed. Most @@ -101,7 +105,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); -extern void compute_creds(struct linux_binprm *binprm); +extern void install_exec_creds(struct linux_binprm *bprm); extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); extern int set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); diff --git a/include/linux/cred.h b/include/linux/cred.h index eaf6fa695a04..8edb4d1d5427 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,8 +84,6 @@ struct thread_group_cred { struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; - -extern void release_tgcred(struct cred *cred); #endif /* @@ -144,6 +142,7 @@ struct cred { extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); extern struct cred *prepare_creds(void); +extern struct cred *prepare_exec_creds(void); extern struct cred *prepare_usermodehelper_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); diff --git a/include/linux/key.h b/include/linux/key.h index 69ecf0934b02..21d32a142c00 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -278,7 +278,6 @@ extern ctl_table key_sysctls[]; * the userspace interface */ extern int install_thread_keyring_to_cred(struct cred *cred); -extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); @@ -294,7 +293,6 @@ extern void key_init(void); #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 -#define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) #define key_fsgid_changed(t) do { } while(0) #define key_init() do { } while(0) diff --git a/include/linux/security.h b/include/linux/security.h index 68be11251447..56a0eed65673 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -57,8 +57,7 @@ extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -extern int cap_bprm_set_security(struct linux_binprm *bprm); -extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +extern int cap_bprm_set_creds(struct linux_binprm *bprm); extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); @@ -110,7 +109,7 @@ extern unsigned long mmap_min_addr; struct sched_param; struct request_sock; -/* bprm_apply_creds unsafe reasons */ +/* bprm->unsafe reasons */ #define LSM_UNSAFE_SHARE 1 #define LSM_UNSAFE_PTRACE 2 #define LSM_UNSAFE_PTRACE_CAP 4 @@ -154,36 +153,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * * Security hooks for program execution operations. * - * @bprm_alloc_security: - * Allocate and attach a security structure to the @bprm->security field. - * The security field is initialized to NULL when the bprm structure is - * allocated. - * @bprm contains the linux_binprm structure to be modified. - * Return 0 if operation was successful. - * @bprm_free_security: - * @bprm contains the linux_binprm structure to be modified. - * Deallocate and clear the @bprm->security field. - * @bprm_apply_creds: - * Compute and set the security attributes of a process being transformed - * by an execve operation based on the old attributes (current->security) - * and the information saved in @bprm->security by the set_security hook. - * Since this function may return an error, in which case the process will - * be killed. However, it can leave the security attributes of the - * process unchanged if an access failure occurs at this point. - * bprm_apply_creds is called under task_lock. @unsafe indicates various - * reasons why it may be unsafe to change security state. - * @bprm contains the linux_binprm structure. - * @bprm_post_apply_creds: - * Runs after bprm_apply_creds with the task_lock dropped, so that - * functions which cannot be called safely under the task_lock can - * be used. This hook is a good place to perform state changes on - * the process such as closing open file descriptors to which access - * is no longer granted if the attributes were changed. - * Note that a security module might need to save state between - * bprm_apply_creds and bprm_post_apply_creds to store the decision - * on whether the process may proceed. - * @bprm contains the linux_binprm structure. - * @bprm_set_security: + * @bprm_set_creds: * Save security information in the bprm->security field, typically based * on information about the bprm->file, for later use by the apply_creds * hook. This hook may also optionally check permissions (e.g. for @@ -196,15 +166,30 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @bprm contains the linux_binprm structure. * Return 0 if the hook is successful and permission is granted. * @bprm_check_security: - * This hook mediates the point when a search for a binary handler will - * begin. It allows a check the @bprm->security value which is set in - * the preceding set_security call. The primary difference from - * set_security is that the argv list and envp list are reliably - * available in @bprm. This hook may be called multiple times - * during a single execve; and in each pass set_security is called - * first. + * This hook mediates the point when a search for a binary handler will + * begin. It allows a check the @bprm->security value which is set in the + * preceding set_creds call. The primary difference from set_creds is + * that the argv list and envp list are reliably available in @bprm. This + * hook may be called multiple times during a single execve; and in each + * pass set_creds is called first. * @bprm contains the linux_binprm structure. * Return 0 if the hook is successful and permission is granted. + * @bprm_committing_creds: + * Prepare to install the new security attributes of a process being + * transformed by an execve operation, based on the old credentials + * pointed to by @current->cred and the information set in @bprm->cred by + * the bprm_set_creds hook. @bprm points to the linux_binprm structure. + * This hook is a good place to perform state changes on the process such + * as closing open file descriptors to which access will no longer be + * granted when the attributes are changed. This is called immediately + * before commit_creds(). + * @bprm_committed_creds: + * Tidy up after the installation of the new security attributes of a + * process being transformed by an execve operation. The new credentials + * have, by this point, been set to @current->cred. @bprm points to the + * linux_binprm structure. This hook is a good place to perform state + * changes on the process such as clearing out non-inheritable signal + * state. This is called immediately after commit_creds(). * @bprm_secureexec: * Return a boolean value (0 or 1) indicating whether a "secure exec" * is required. The flag is passed in the auxiliary table @@ -1301,13 +1286,11 @@ struct security_operations { int (*settime) (struct timespec *ts, struct timezone *tz); int (*vm_enough_memory) (struct mm_struct *mm, long pages); - int (*bprm_alloc_security) (struct linux_binprm *bprm); - void (*bprm_free_security) (struct linux_binprm *bprm); - int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); - void (*bprm_post_apply_creds) (struct linux_binprm *bprm); - int (*bprm_set_security) (struct linux_binprm *bprm); + int (*bprm_set_creds) (struct linux_binprm *bprm); int (*bprm_check_security) (struct linux_binprm *bprm); int (*bprm_secureexec) (struct linux_binprm *bprm); + void (*bprm_committing_creds) (struct linux_binprm *bprm); + void (*bprm_committed_creds) (struct linux_binprm *bprm); int (*sb_alloc_security) (struct super_block *sb); void (*sb_free_security) (struct super_block *sb); @@ -1569,12 +1552,10 @@ int security_settime(struct timespec *ts, struct timezone *tz); int security_vm_enough_memory(long pages); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_vm_enough_memory_kern(long pages); -int security_bprm_alloc(struct linux_binprm *bprm); -void security_bprm_free(struct linux_binprm *bprm); -int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); -void security_bprm_post_apply_creds(struct linux_binprm *bprm); -int security_bprm_set(struct linux_binprm *bprm); +int security_bprm_set_creds(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); +void security_bprm_committing_creds(struct linux_binprm *bprm); +void security_bprm_committed_creds(struct linux_binprm *bprm); int security_bprm_secureexec(struct linux_binprm *bprm); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); @@ -1812,32 +1793,22 @@ static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) return cap_vm_enough_memory(mm, pages); } -static inline int security_bprm_alloc(struct linux_binprm *bprm) -{ - return 0; -} - -static inline void security_bprm_free(struct linux_binprm *bprm) -{ } - -static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static inline int security_bprm_set_creds(struct linux_binprm *bprm) { - return cap_bprm_apply_creds(bprm, unsafe); + return cap_bprm_set_creds(bprm); } -static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm) +static inline int security_bprm_check(struct linux_binprm *bprm) { - return; + return 0; } -static inline int security_bprm_set(struct linux_binprm *bprm) +static inline void security_bprm_committing_creds(struct linux_binprm *bprm) { - return cap_bprm_set_security(bprm); } -static inline int security_bprm_check(struct linux_binprm *bprm) +static inline void security_bprm_committed_creds(struct linux_binprm *bprm) { - return 0; } static inline int security_bprm_secureexec(struct linux_binprm *bprm) -- cgit v1.2.3 From d76b0d9b2d87cfc95686e148767cbf7d0e22bdc0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:25 +1100 Subject: CRED: Use creds in file structs Attach creds to file structs and discard f_uid/f_gid. file_operations::open() methods (such as hppfs_open()) should use file->f_cred rather than current_cred(). At the moment file->f_cred will be current_cred() at this point. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3bfec1327b8d..c0fb6d81d89b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -827,7 +827,7 @@ struct file { fmode_t f_mode; loff_t f_pos; struct fown_struct f_owner; - unsigned int f_uid, f_gid; + const struct cred *f_cred; struct file_ra_state f_ra; u64 f_version; -- cgit v1.2.3 From 98870ab0a5a3f1822aee681d2997017e1c87d026 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:26 +1100 Subject: CRED: Documentation Document credentials and the new credentials API. Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/cred.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 8edb4d1d5427..794aab5c66e5 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -1,4 +1,4 @@ -/* Credentials management +/* Credentials management - see Documentation/credentials.txt * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) @@ -169,6 +169,12 @@ static inline struct cred *get_new_cred(struct cred *cred) * * Get a reference on the specified set of credentials. The caller must * release the reference. + * + * This is used to deal with a committed set of credentials. Although the + * pointer is const, this will temporarily discard the const and increment the + * usage count. The purpose of this is to attempt to catch at compile time the + * accidental alteration of a set of credentials that should be considered + * immutable. */ static inline const struct cred *get_cred(const struct cred *cred) { @@ -181,6 +187,10 @@ static inline const struct cred *get_cred(const struct cred *cred) * * Release a reference to a set of credentials, deleting them when the last ref * is released. + * + * This takes a const pointer to a set of credentials because the credentials + * on task_struct are attached by const pointers to prevent accidental + * alteration of otherwise immutable credential sets. */ static inline void put_cred(const struct cred *_cred) { -- cgit v1.2.3 From 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:26 +1100 Subject: CRED: Differentiate objective and effective subjective credentials on a task Differentiate the objective and real subjective credentials from the effective subjective credentials on a task by introducing a second credentials pointer into the task_struct. task_struct::real_cred then refers to the objective and apparent real subjective credentials of a task, as perceived by the other tasks in the system. task_struct::cred then refers to the effective subjective credentials of a task, as used by that task when it's actually running. These are not visible to the other tasks in the system. __task_cred(task) then refers to the objective/real credentials of the task in question. current_cred() refers to the effective subjective credentials of the current task. prepare_creds() uses the objective creds as a base and commit_creds() changes both pointers in the task_struct (indeed commit_creds() requires them to be the same). override_creds() and revert_creds() change the subjective creds pointer only, and the former returns the old subjective creds. These are used by NFSD, faccessat() and do_coredump(), and will by used by CacheFiles. In SELinux, current_has_perm() is provided as an alternative to task_has_perm(). This uses the effective subjective context of current, whereas task_has_perm() uses the objective/real context of the subject. Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/cred.h | 29 +++++++++++++++-------------- include/linux/init_task.h | 1 + include/linux/sched.h | 5 ++++- 3 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 794aab5c66e5..55a9c995d694 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -146,8 +146,8 @@ extern struct cred *prepare_exec_creds(void); extern struct cred *prepare_usermodehelper_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); -extern const struct cred *override_creds(const struct cred *) __deprecated; -extern void revert_creds(const struct cred *) __deprecated; +extern const struct cred *override_creds(const struct cred *); +extern void revert_creds(const struct cred *); extern void __init cred_init(void); /** @@ -202,32 +202,32 @@ static inline void put_cred(const struct cred *_cred) } /** - * current_cred - Access the current task's credentials + * current_cred - Access the current task's subjective credentials * - * Access the credentials of the current task. + * Access the subjective credentials of the current task. */ #define current_cred() \ (current->cred) /** - * __task_cred - Access another task's credentials + * __task_cred - Access a task's objective credentials * @task: The task to query * - * Access the credentials of another task. The caller must hold the - * RCU readlock. + * Access the objective credentials of a task. The caller must hold the RCU + * readlock. * * The caller must make sure task doesn't go away, either by holding a ref on * task or by holding tasklist_lock to prevent it from being unlinked. */ #define __task_cred(task) \ - ((const struct cred *)(rcu_dereference((task)->cred))) + ((const struct cred *)(rcu_dereference((task)->real_cred))) /** - * get_task_cred - Get another task's credentials + * get_task_cred - Get another task's objective credentials * @task: The task to query * - * Get the credentials of a task, pinning them so that they can't go away. - * Accessing a task's credentials directly is not permitted. + * Get the objective credentials of a task, pinning them so that they can't go + * away. Accessing a task's credentials directly is not permitted. * * The caller must make sure task doesn't go away, either by holding a ref on * task or by holding tasklist_lock to prevent it from being unlinked. @@ -243,10 +243,11 @@ static inline void put_cred(const struct cred *_cred) }) /** - * get_current_cred - Get the current task's credentials + * get_current_cred - Get the current task's subjective credentials * - * Get the credentials of the current task, pinning them so that they can't go - * away. Accessing the current task's credentials directly is not permitted. + * Get the subjective credentials of the current task, pinning them so that + * they can't go away. Accessing the current task's credentials directly is + * not permitted. */ #define get_current_cred() \ (get_cred(current_cred())) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 08c3b24ad9a8..2597858035cd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ + .real_cred = &init_cred, \ .cred = &init_cred, \ .cred_exec_mutex = \ __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 121d655e460d..3443123b0709 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1145,7 +1145,10 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *cred; /* actual/objective task credentials (COW) */ + const struct cred *real_cred; /* objective and real subjective task + * credentials (COW) */ + const struct cred *cred; /* effective (overridable) subjective task + * credentials (COW) */ struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ char comm[TASK_COMM_LEN]; /* executable name excluding path -- cgit v1.2.3 From 3a3b7ce9336952ea7b9564d976d068a238976c9d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:28 +1100 Subject: CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler [Smack changes] Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/cred.h | 6 ++++++ include/linux/security.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 55a9c995d694..26c1ab179946 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,6 +18,7 @@ struct user_struct; struct cred; +struct inode; /* * COW Supplementary groups list @@ -148,6 +149,11 @@ extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); extern const struct cred *override_creds(const struct cred *); extern void revert_creds(const struct cred *); +extern struct cred *prepare_kernel_cred(struct task_struct *); +extern int change_create_files_as(struct cred *, struct inode *); +extern int set_security_override(struct cred *, u32); +extern int set_security_override_from_ctx(struct cred *, const char *); +extern int set_create_files_as(struct cred *, struct inode *); extern void __init cred_init(void); /** diff --git a/include/linux/security.h b/include/linux/security.h index 56a0eed65673..59a11e19b617 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -587,6 +587,19 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new points to the new credentials. * @old points to the original credentials. * Install a new set of credentials. + * @kernel_act_as: + * Set the credentials for a kernel service to act as (subjective context). + * @new points to the credentials to be modified. + * @secid specifies the security ID to be set + * The current task must be the one that nominated @secid. + * Return 0 if successful. + * @kernel_create_files_as: + * Set the file creation context in a set of credentials to be the same as + * the objective context of the specified inode. + * @new points to the credentials to be modified. + * @inode points to the inode to use as a reference. + * The current task must be the one that nominated @inode. + * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -1381,6 +1394,8 @@ struct security_operations { int (*cred_prepare)(struct cred *new, const struct cred *old, gfp_t gfp); void (*cred_commit)(struct cred *new, const struct cred *old); + int (*kernel_act_as)(struct cred *new, u32 secid); + int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1632,6 +1647,8 @@ int security_task_create(unsigned long clone_flags); void security_cred_free(struct cred *cred); int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); void security_commit_creds(struct cred *new, const struct cred *old); +int security_kernel_act_as(struct cred *new, u32 secid); +int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2151,6 +2168,17 @@ static inline void security_commit_creds(struct cred *new, { } +static inline int security_kernel_act_as(struct cred *cred, u32 secid) +{ + return 0; +} + +static inline int security_kernel_create_files_as(struct cred *cred, + struct inode *inode) +{ + return 0; +} + static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { -- cgit v1.2.3 From 31e889098a80ceb3e9e3c555d522b2686a6663c6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: ftrace: pass module struct to arch dynamic ftrace functions Impact: allow archs more flexibility on dynamic ftrace implementations Dynamic ftrace has largly been developed on x86. Since x86 does not have the same limitations as other architectures, the ftrace interaction between the generic code and the architecture specific code was not flexible enough to handle some of the issues that other architectures have. Most notably, module trampolines. Due to the limited branch distance that archs make in calling kernel core code from modules, the module load code must create a trampoline to jump to what will make the larger jump into core kernel code. The problem arises when this happens to a call to mcount. Ftrace checks all code before modifying it and makes sure the current code is what it expects. Right now, there is not enough information to handle modifying module trampolines. This patch changes the API between generic dynamic ftrace code and the arch dependent code. There is now two functions for modifying code: ftrace_make_nop(mod, rec, addr) - convert the code at rec->ip into a nop, where the original text is calling addr. (mod is the module struct if called by module init) ftrace_make_caller(rec, addr) - convert the code rec->ip that should be a nop into a caller to addr. The record "rec" now has a new field called "arch" where the architecture can add any special attributes to each call site record. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 53 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4fbc4a8b86a5..166a2070ef65 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE +/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ +#include + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -88,6 +91,7 @@ struct dyn_ftrace { struct list_head list; unsigned long ip; /* address of mcount call-site */ unsigned long flags; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); @@ -95,22 +99,40 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); -extern unsigned char *ftrace_nop_replace(void); -extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -/* May be defined in arch */ -extern int ftrace_arch_read_dyn_info(char *buf, int size); +/** + * ftrace_make_nop - convert code into top + * @mod: module structure if called by module load initialization + * @rec: the mcount call site record + * @addr: the address that the call site should be calling + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr); /** - * ftrace_modify_code - modify code segment - * @ip: the address of the code segment - * @old_code: the contents of what is expected to be there - * @new_code: the code to patch in + * ftrace_make_call - convert a nop call site into a call to addr + * @rec: the mcount call site record + * @addr: the address that the call site should call * * This is a very sensitive operation and great care needs * to be taken by the arch. The operation should carefully @@ -118,6 +140,8 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * what we expect it to be, and then on success of the compare, * it should write to the location. * + * The code segment at @rec->ip should be a nop + * * Return must be: * 0 on success * -EFAULT on error reading the location @@ -125,8 +149,11 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * -EPERM on error writing to the location * Any other value will be considered a failure. */ -extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code); +extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); + + +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); @@ -259,11 +286,13 @@ static inline void ftrace_dump(void) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(unsigned long *start, unsigned long *end); +extern void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } static inline void -ftrace_init_module(unsigned long *start, unsigned long *end) { } +ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { } #endif -- cgit v1.2.3 From e7d3737ea1b102030f44e96c97754101e41515f0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 06:02:06 +0100 Subject: tracing/function-return-tracer: support for dynamic ftrace on function return tracer This patch adds the support for dynamic tracing on the function return tracer. The whole difference with normal dynamic function tracing is that we don't need to hook on a particular callback. The only pro that we want is to nop or set dynamically the calls to ftrace_caller (which is ftrace_return_caller here). Some security checks ensure that we are not trying to launch dynamic tracing for return tracing while normal function tracing is already running. An example of trace with getnstimeofday set as a filter: ktime_get_ts+0x22/0x50 -> getnstimeofday (2283 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1396 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1825 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1426 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1524 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1434 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1502 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1404 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1397 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1051 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1314 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1344 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1163 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1390 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1374 ns) Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 166a2070ef65..f1af1aab00e6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -25,6 +25,17 @@ struct ftrace_ops { extern int function_trace_stop; +/* + * Type of the current tracing. + */ +enum ftrace_tracing_type_t { + FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */ + FTRACE_TYPE_RETURN, /* Hook the return of the function */ +}; + +/* Current tracing type, default is FTRACE_TYPE_ENTER */ +extern enum ftrace_tracing_type_t ftrace_tracing_type; + /** * ftrace_stop - stop function tracer. * @@ -104,6 +115,9 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#ifdef CONFIG_FUNCTION_RET_TRACER +extern void ftrace_return_caller(void); +#endif /** * ftrace_make_nop - convert code into top @@ -310,7 +324,7 @@ struct ftrace_retfunc { /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); -extern void register_ftrace_return(trace_function_return_t func); +extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); -- cgit v1.2.3 From 954e100d2275cb2f150f2b18d5cddcdf67b956ac Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:34 -0500 Subject: rcu: add rcu_read_*_sched_notrace() Impact: new API, useful for tracepoints and markers. Add _notrace version to rcu_read_*_sched(). Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E McKenney Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e43e33..895dc9c1088c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -142,6 +142,7 @@ struct rcu_head { * on the write-side to insure proper synchronization. */ #define rcu_read_lock_sched() preempt_disable() +#define rcu_read_lock_sched_notrace() preempt_disable_notrace() /* * rcu_read_unlock_sched - marks the end of a RCU-classic critical section @@ -149,6 +150,7 @@ struct rcu_head { * See rcu_read_lock_sched for more information. */ #define rcu_read_unlock_sched() preempt_enable() +#define rcu_read_unlock_sched_notrace() preempt_enable_notrace() -- cgit v1.2.3 From e3f8c4b9117d70127a8cab480af83bbfd048a28b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Nov 2008 17:47:36 -0500 Subject: markers: add missing stdargs.h include, needed due to va_list usage Impact: build fix (for future changes) That seemed to cause built issue when marker.h is included early, even though stdargs.h is included in kernel.h. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 4cf45472d9f5..05ec0df37089 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -12,6 +12,7 @@ * See the file COPYING for more details. */ +#include #include struct module; -- cgit v1.2.3 From c1df1bd2c4d4b20c83755a0f41956b57aec4842a Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:39 -0500 Subject: markers: auto enable tracepoints (new API : trace_mark_tp()) Impact: new API Add a new API trace_mark_tp(), which declares a marker within a tracepoint probe. When the marker is activated, the tracepoint is automatically enabled. No branch test is used at the marker site, because it would be a duplicate of the branch already present in the tracepoint. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 05ec0df37089..57a307018ceb 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -49,6 +49,8 @@ struct marker { void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; + const char *tp_name; /* Optional tracepoint name */ + void *tp_cb; /* Optional tracepoint callback */ } __attribute__((aligned(8))); #ifdef CONFIG_MARKERS @@ -73,7 +75,7 @@ struct marker { __attribute__((section("__markers"), aligned(8))) = \ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL }; \ + { __mark_empty_function, NULL}, NULL, NULL, NULL }; \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -81,11 +83,38 @@ struct marker { } \ } while (0) +#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ + do { \ + void __check_tp_type(void) \ + { \ + register_trace_##tp_name(tp_cb); \ + } \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, \ + { __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\ + __mark_check_format(format, ## args); \ + (*__mark_##name.call)(&__mark_##name, call_private, \ + ## args); \ + } while (0) + extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) +#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ + do { \ + void __check_tp_type(void) \ + { \ + register_trace_##tp_name(tp_cb); \ + } \ + __mark_check_format(format, ## args); \ + } while (0) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } @@ -117,6 +146,20 @@ static inline void marker_update_probe_range(struct marker *begin, #define _trace_mark(name, format, args...) \ __trace_mark(1, name, NULL, format, ## args) +/** + * trace_mark_tp - Marker in a tracepoint callback + * @name: marker name, not quoted. + * @tp_name: tracepoint name, not quoted. + * @tp_cb: tracepoint callback. Should have an associated global symbol so it + * is not optimized away by the compiler (should not be static). + * @format: format string + * @args...: variable argument list + * + * Places a marker in a tracepoint callback. + */ +#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ + __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) + /** * MARK_NOARGS - Format string for a marker with no argument. */ -- cgit v1.2.3 From a0bca6a59ebc052751eed6e3b182c153495672d8 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:40 -0500 Subject: markers: create DEFINE_MARKER and GET_MARKER (new API) Impact: new API. Allow markers to be used only for declaration, without function call associated. Useful to create specialized probes. The problem we had is that two function calls were required when one wanted to put a marker in a tracepoint probe. Now the marker can be used simply for trace data type declaration, leaving the trace write work within the tracepoint probe without any additional function call. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 57a307018ceb..34c14bc957f5 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -55,6 +55,22 @@ struct marker { #ifdef CONFIG_MARKERS +#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ + NULL, tp_name_str, tp_cb } + +#define DEFINE_MARKER(name, format) \ + _DEFINE_MARKER(name, NULL, NULL, format) + +#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ + _DEFINE_MARKER(name, #tp_name, tp_cb, format) + /* * Note : the empty asm volatile with read constraint is used here instead of a * "used" attribute to fix a gcc 4.1.x bug. @@ -68,14 +84,7 @@ struct marker { */ #define __trace_mark(generic, name, call_private, format, args...) \ do { \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL, NULL, NULL }; \ + DEFINE_MARKER(name, format); \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -89,14 +98,7 @@ struct marker { { \ register_trace_##tp_name(tp_cb); \ } \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\ + DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ __mark_check_format(format, ## args); \ (*__mark_##name.call)(&__mark_##name, call_private, \ ## args); \ @@ -104,7 +106,11 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); + +#define GET_MARKER(name) (__mark_##name) + #else /* !CONFIG_MARKERS */ +#define DEFINE_MARKER(name, tp_name, tp_cb, format) #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) #define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ @@ -118,6 +124,7 @@ extern void marker_update_probe_range(struct marker *begin, static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } +#define GET_MARKER(name) #endif /* CONFIG_MARKERS */ /** -- cgit v1.2.3 From da7b3eab167091693ad215ad7692f7d0d24d1356 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:43 -0500 Subject: tracepoints: use rcu_*_sched_notrace Make sure tracepoints can be called within ftrace callbacks. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63064e9403f2..69648c54a326 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -40,14 +40,14 @@ struct tracepoint { do { \ void **it_func; \ \ - rcu_read_lock_sched(); \ + rcu_read_lock_sched_notrace(); \ it_func = rcu_dereference((tp)->funcs); \ if (it_func) { \ do { \ ((void(*)(proto))(*it_func))(args); \ } while (*(++it_func)); \ } \ - rcu_read_unlock_sched(); \ + rcu_read_unlock_sched_notrace(); \ } while (0) /* -- cgit v1.2.3 From c420970ef476d7d68df119711700666224001f43 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:44 -0500 Subject: tracepoints: use unregister return value Impact: bugfix. Unregistering a tracepoint can fail. Return the error value. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 69648c54a326..c60a791f8874 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -73,9 +73,9 @@ struct tracepoint { return tracepoint_probe_register(#name ":" #proto, \ (void *)probe); \ } \ - static inline void unregister_trace_##name(void (*probe)(proto))\ + static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - tracepoint_probe_unregister(#name ":" #proto, \ + return tracepoint_probe_unregister(#name ":" #proto, \ (void *)probe); \ } @@ -92,8 +92,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, { \ return -ENOSYS; \ } \ - static inline void unregister_trace_##name(void (*probe)(proto))\ - { } + static inline int unregister_trace_##name(void (*probe)(proto)) \ + { \ + return -ENOSYS; \ + } static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) -- cgit v1.2.3 From 5f382671def7cb9c0f4b75d586dc5f60dca5e1c3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:45 -0500 Subject: tracepoints: do not put arguments in name Impact: cleanup That's overkill, takes space. We have a global tracepoint registery in header files anyway. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c60a791f8874..7e9b42aeae0e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,7 +60,7 @@ struct tracepoint { { \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) \ - = #name ":" #proto; \ + = #name; \ static struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"), aligned(8))) = \ { __tpstrtab_##name, 0, NULL }; \ @@ -70,13 +70,11 @@ struct tracepoint { } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_register(#name ":" #proto, \ - (void *)probe); \ + return tracepoint_probe_register(#name, (void *)probe); \ } \ static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_unregister(#name ":" #proto, \ - (void *)probe); \ + return tracepoint_probe_unregister(#name, (void *)probe);\ } extern void tracepoint_update_probe_range(struct tracepoint *begin, -- cgit v1.2.3 From 7e066fb870fcd1025ec3ba7bbde5d541094f4ce1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:47 -0500 Subject: tracepoints: add DECLARE_TRACE() and DEFINE_TRACE() Impact: API *CHANGE*. Must update all tracepoint users. Add DEFINE_TRACE() to tracepoints to let them declare the tracepoint structure in a single spot for all the kernel. It helps reducing memory consumption, especially when declaring a lot of tracepoints, e.g. for kmalloc tracing. *API CHANGE WARNING*: now, DECLARE_TRACE() must be used in headers for tracepoint declarations rather than DEFINE_TRACE(). This is the sane way to do it. The name previously used was misleading. Updates scheduler instrumentation to follow this API change. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7e9b42aeae0e..757005458366 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -24,8 +24,12 @@ struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ void **funcs; -} __attribute__((aligned(8))); - +} __attribute__((aligned(32))); /* + * Aligned on 32 bytes because it is + * globally visible and gcc happily + * align these on the structure size. + * Keep in sync with vmlinux.lds.h. + */ #define TPPROTO(args...) args #define TPARGS(args...) args @@ -55,15 +59,10 @@ struct tracepoint { * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ + extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) \ - = #name; \ - static struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"), aligned(8))) = \ - { __tpstrtab_##name, 0, NULL }; \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ TPPROTO(proto), TPARGS(args)); \ @@ -77,11 +76,23 @@ struct tracepoint { return tracepoint_probe_unregister(#name, (void *)probe);\ } +#define DEFINE_TRACE(name) \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) = #name; \ + struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"), aligned(32))) = \ + { __tpstrtab_##name, 0, NULL } + +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ + EXPORT_SYMBOL_GPL(__tracepoint_##name) +#define EXPORT_TRACEPOINT_SYMBOL(name) \ + EXPORT_SYMBOL(__tracepoint_##name) + extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -95,6 +106,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) + static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } -- cgit v1.2.3 From f004f3ea34209d8b836426b26ade3dc502631b18 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Fri, 14 Nov 2008 00:24:34 +0000 Subject: phylib: make mdio-gpio work without OF (v4) make mdio-gpio work with non OpenFirmware gpio implementation. Aditional changes to mdio-gpio: - use gpio_request() and gpio_free() - place irq[] array in struct mdio_gpio_info - add module description, author and license - add note about compiling this driver as module - rename mdc and mdio function (were ugly names) - change MII to MDIO in bus name - add __init __exit to module (un)loading functions - probe fails if no phys added to the bus - kzalloc bitbang with sizeof(*bitbang) Changes since v3: - keep bus naming "%x" to be compatible with existing drivers. Changes since v2: - more #ifdefs reduction - platform driver will be registered on OF platforms also - unified platform and OF bus_id to phy%i Changes since v1: - removed NO_IRQ - reduced #idefs Laurent, please test this driver under OF. Signed-off-by: Paulius Zaleckas Signed-off-by: David S. Miller --- include/linux/mdio-gpio.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/mdio-gpio.h (limited to 'include/linux') diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h new file mode 100644 index 000000000000..e9d3fdfe41d7 --- /dev/null +++ b/include/linux/mdio-gpio.h @@ -0,0 +1,25 @@ +/* + * MDIO-GPIO bus platform data structures + * + * Copyright (C) 2008, Paulius Zaleckas + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __LINUX_MDIO_GPIO_H +#define __LINUX_MDIO_GPIO_H + +#include + +struct mdio_gpio_platform_data { + /* GPIO numbers for bus pins */ + unsigned int mdc; + unsigned int mdio; + + unsigned int phy_mask; + int irqs[PHY_MAX_ADDR]; +}; + +#endif /* __LINUX_MDIO_GPIO_H */ -- cgit v1.2.3 From e8b2dfe9b4501ed0047459b2756ba26e5a940a69 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sun, 16 Nov 2008 19:32:39 -0800 Subject: TPROXY: implemented IP_RECVORIGDSTADDR socket option In case UDP traffic is redirected to a local UDP socket, the originally addressed destination address/port cannot be recovered with the in-kernel tproxy. This patch adds an IP_RECVORIGDSTADDR sockopt that enables a IP_ORIGDSTADDR ancillary message in recvmsg(). This ancillary message contains the original destination address/port of the packet being received. Signed-off-by: Balazs Scheidler Signed-off-by: David S. Miller --- include/linux/in.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index db458beef19d..d60122a3a088 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -80,6 +80,10 @@ struct in_addr { /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS +/* TProxy original addresses */ +#define IP_ORIGDSTADDR 20 +#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR + /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ -- cgit v1.2.3 From bbaffaca4810de1a25e32ecaf836eeaacc7a3d11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:37:55 -0800 Subject: rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet Acked-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/linux/list_nulls.h | 94 ++++++++++++++++++++++++++++++++++++ include/linux/rculist_nulls.h | 110 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 include/linux/list_nulls.h create mode 100644 include/linux/rculist_nulls.h (limited to 'include/linux') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h new file mode 100644 index 000000000000..93150ecf3ea4 --- /dev/null +++ b/include/linux/list_nulls.h @@ -0,0 +1,94 @@ +#ifndef _LINUX_LIST_NULLS_H +#define _LINUX_LIST_NULLS_H + +/* + * Special version of lists, where end of list is not a NULL pointer, + * but a 'nulls' marker, which can have many different values. + * (up to 2^31 different values guaranteed on all platforms) + * + * In the standard hlist, termination of a list is the NULL pointer. + * In this special 'nulls' variant, we use the fact that objects stored in + * a list are aligned on a word (4 or 8 bytes alignment). + * We therefore use the last significant bit of 'ptr' : + * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1) + * Set to 0 : This is a pointer to some object (ptr) + */ + +struct hlist_nulls_head { + struct hlist_nulls_node *first; +}; + +struct hlist_nulls_node { + struct hlist_nulls_node *next, **pprev; +}; +#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ + ((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1))) + +#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) +/** + * ptr_is_a_nulls - Test if a ptr is a nulls + * @ptr: ptr to be tested + * + */ +static inline int is_a_nulls(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr & 1); +} + +/** + * get_nulls_value - Get the 'nulls' value of the end of chain + * @ptr: end of chain + * + * Should be called only if is_a_nulls(ptr); + */ +static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr) >> 1; +} + +static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) +{ + return !h->pprev; +} + +static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) +{ + return is_a_nulls(h->first); +} + +static inline void __hlist_nulls_del(struct hlist_nulls_node *n) +{ + struct hlist_nulls_node *next = n->next; + struct hlist_nulls_node **pprev = n->pprev; + *pprev = next; + if (!is_a_nulls(next)) + next->pprev = pprev; +} + +/** + * hlist_nulls_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry_from(tpos, pos, member) \ + for (; (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#endif diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h new file mode 100644 index 000000000000..f9ddd03961a8 --- /dev/null +++ b/include/linux/rculist_nulls.h @@ -0,0 +1,110 @@ +#ifndef _LINUX_RCULIST_NULLS_H +#define _LINUX_RCULIST_NULLS_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include +#include + +/** + * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization + * @n: the element to delete from the hash list. + * + * Note: hlist_nulls_unhashed() on the node return true after this. It is + * useful for RCU based read lockfree traversal if the writer side + * must know if the list entry is still hashed or already unhashed. + * + * In particular, it means that we can not poison the forward pointers + * that may still be used for walking the hash list and we can only + * zero the pprev pointer so list_unhashed() will return true after + * this. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another + * list-mutation primitive, such as hlist_nulls_add_head_rcu() or + * hlist_nulls_del_rcu(), running on this same list. However, it is + * perfectly legal to run concurrently with the _rcu list-traversal + * primitives, such as hlist_nulls_for_each_entry_rcu(). + */ +static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) +{ + if (!hlist_nulls_unhashed(n)) { + __hlist_nulls_del(n); + n->pprev = NULL; + } +} + +/** + * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: hlist_nulls_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry(). + */ +static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) +{ + __hlist_nulls_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_nulls_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist_nulls, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *first = h->first; + + n->next = first; + n->pprev = &h->first; + rcu_assign_pointer(h->first, n); + if (!is_a_nulls(first)) + first->pprev = &n->next; +} +/** + * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_nulls_node within the struct. + * + */ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference((head)->first); \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference(pos->next)) + +#endif +#endif -- cgit v1.2.3 From 88ab1932eac721c6e7336708558fa5ed02c85c80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:39:21 -0800 Subject: udp: Use hlist_nulls in UDP RCU code This is a straightforward patch, using hlist_nulls infrastructure. RCUification already done on UDP two weeks ago. Using hlist_nulls permits us to avoid some memory barriers, both at lookup time and delete time. Patch is large because it adds new macros to include/net/sock.h. These macros will be used by TCP & DCCP in next patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 3ba2998b22ba..e649bd3f2c97 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -383,22 +383,5 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) -/** - * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * @next: the &struct hlist_node to use as a next cursor - * - * Special version of hlist_for_each_entry_rcu that make sure - * each next pointer is fetched before each iteration. - */ -#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference(next)) - #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 3f2c31d90327f21d76d296af34aa4ca547932ff4 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sun, 16 Nov 2008 22:41:34 -0800 Subject: virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell (use netdev_priv) Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5e33761b9b8a..5cdd0aa8bde9 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -20,6 +20,7 @@ #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ struct virtio_net_config { @@ -44,4 +45,12 @@ struct virtio_net_hdr __u16 csum_start; /* Position to start checksumming from */ __u16 csum_offset; /* Offset after that to place checksum */ }; + +/* This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + __u16 num_buffers; /* Number of merged rx buffers */ +}; + #endif /* _LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 49aebc66d6b896f9c7c5739d85c4548c00015aa7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:51:23 -0800 Subject: dccp: Deprecate old setsockopt framework The previous setsockopt interface, which passed socket options via struct dccp_so_feat, is complicated/difficult to use. Continuing to support it leads to ugly code since the old approach did not distinguish between NN and SP values. This patch removes the old setsockopt interface and replaces it with two new functions to register NN/SP values for feature negotiation. These are essentially wrappers around the internal __feat_register functions, with checking added to avoid * wrong usage (type); * changing values while the connection is in progress. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/dccp.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d3ac1bde60b4..6eaaca9b037a 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -193,13 +193,6 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ -struct dccp_so_feat { - __u8 dccpsf_feat; - __u8 __user *dccpsf_val; - __u8 dccpsf_len; -}; - /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 -- cgit v1.2.3 From 29450559849da7066813601effb7666966869853 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:53:48 -0800 Subject: dccp: Feature negotiation for minimum-checksum-coverage This provides feature negotiation for server minimum checksum coverage which so far has been missing. Since sender/receiver coverage values range only from 0...15, their type has also been reduced in size from u16 to u4. Feature-negotiation options are now generated for both sender and receiver coverage, i.e. when the peer has `forgotten' to enable partial coverage then feature negotiation will automatically enable (negotiate) the partial coverage value for this connection. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6eaaca9b037a..5a5a89935dbc 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -527,8 +527,8 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u16 dccps_pcslen; - __u16 dccps_pcrlen; + __u8 dccps_pcslen:4; + __u8 dccps_pcrlen:4; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.2.3 From dd9c0e363cef32b7d6f23d4c87e8dfe4f91fd1c5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:55:08 -0800 Subject: dccp: Deprecate Ack Ratio sysctl This patch deprecates the Ack Ratio sysctl, since * Ack Ratio is entirely ignored by CCID-3 and CCID-4, * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1); * even if it would work in CCID-2, there is no point for a user to change it: - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2), - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts (since waiting for Acks which will never arrive in this window), - cwnd is not a user-configurable value. The only reasonable place for Ack Ratio is to print it for debugging. It is planned to do this later on, as part of e.g. dccp_probe. With this patch Ack Ratio is now under full control of feature negotiation: * Ack Ratio is resolved as a dependency of the selected CCID; * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to the default of 2, following RFC 4340, 11.3 - "New connections start with Ack Ratio 2 for both endpoints"; * what happens then is part of another patch set, since it concerns the dynamic update of Ack Ratio while the connection is in full flight. Thanks to Tomasz Grobelny for discussion leading up to this patch. Signed-off-by: Gerrit Renker Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 5a5a89935dbc..eda389ce04f4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -368,7 +368,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) - * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ @@ -378,7 +377,6 @@ struct dccp_minisock { __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; - __u8 dccpms_ack_ratio; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.2.3 From e17be2b2a95b43fe0d5878adf330701bb7a77115 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 17 Nov 2008 15:24:14 +0000 Subject: uwb: add pal parameter to new reservation callback The pal parameter allows PALs to retrieve their PAL-specific data structure. Signed-off-by: David Vrabel --- include/linux/uwb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index c4854848999d..effd97998fd1 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -405,7 +405,7 @@ struct uwb_pal { struct list_head node; const char *name; struct device *device; - void (*new_rsv)(struct uwb_rsv *rsv); + void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv); }; void uwb_pal_init(struct uwb_pal *pal); -- cgit v1.2.3 From 0231022cc32d5f2e7f3c06b75691dda0ad6aec33 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Nov 2008 03:22:41 +0100 Subject: tracing/function-return-tracer: add the overrun field Impact: help to find the better depth of trace We decided to arbitrary define the depth of function return trace as "20". Perhaps this is not enough. To help finding an optimal depth, we measure now the overrun: the number of functions that have been missed for the current thread. By default this is not displayed, we have to do set a particular flag on the return tracer: echo overrun > /debug/tracing/trace_options And the overrun will be printed on the right. As the trace shows below, the current 20 depth is not enough. update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838) update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838) do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838) tick_do_update_jiffies64+0xbf/0x160 -> do_timer (5339 ns) (Overruns: 2838) tick_sched_timer+0x6a/0xf0 -> tick_do_update_jiffies64 (7209 ns) (Overruns: 2838) vgacon_set_cursor_size+0x98/0x120 -> native_io_delay (2613 ns) (Overruns: 274) vgacon_cursor+0x16e/0x1d0 -> vgacon_set_cursor_size (33151 ns) (Overruns: 274) set_cursor+0x5f/0x80 -> vgacon_cursor (36432 ns) (Overruns: 274) con_flush_chars+0x34/0x40 -> set_cursor (38790 ns) (Overruns: 274) release_console_sem+0x1ec/0x230 -> up (721 ns) (Overruns: 274) release_console_sem+0x225/0x230 -> wake_up_klogd (316 ns) (Overruns: 274) con_flush_chars+0x39/0x40 -> release_console_sem (2996 ns) (Overruns: 274) con_write+0x22/0x30 -> con_flush_chars (46067 ns) (Overruns: 274) n_tty_write+0x1cc/0x360 -> con_write (292670 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2a/0x90 -> native_apic_mem_write (330 ns) (Overruns: 274) irq_enter+0x17/0x70 -> idle_cpu (413 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2f/0x90 -> irq_enter (1525 ns) (Overruns: 274) ktime_get_ts+0x40/0x70 -> getnstimeofday (465 ns) (Overruns: 274) ktime_get_ts+0x60/0x70 -> set_normalized_timespec (436 ns) (Overruns: 274) ktime_get+0x16/0x30 -> ktime_get_ts (2501 ns) (Overruns: 274) hrtimer_interrupt+0x77/0x1a0 -> ktime_get (3439 ns) (Overruns: 274) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ include/linux/sched.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f1af1aab00e6..f7ba4ea5e128 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -318,6 +318,8 @@ struct ftrace_retfunc { unsigned long func; /* Current function */ unsigned long long calltime; unsigned long long rettime; + /* Number of functions that overran the depth limit for current task */ + unsigned long overrun; }; #ifdef CONFIG_FUNCTION_RET_TRACER diff --git a/include/linux/sched.h b/include/linux/sched.h index 61c8cc36028a..c8e0db464206 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2016,6 +2016,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct * used. */ task_thread_info(p)->curr_ret_stack = -1; + atomic_set(&task_thread_info(p)->trace_overrun, 0); #endif } -- cgit v1.2.3 From 1e291b14c8f1101b9093434489bd4dc0e03f3d0f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 12 Nov 2008 18:54:42 +0000 Subject: of: Add helpers for finding device nodes which have a given property This commit adds a routine for finding a device node which has a certain property. The contents of the property are not taken into account, merely the presence or absence of the property. Based on that routine, we add a for_each_ macro for iterating over all nodes that have a certain property. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/linux/of.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index e2488f5e7cb2..6a7efa242f5e 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -57,6 +57,12 @@ extern struct device_node *of_get_next_child(const struct device_node *node, for (child = of_get_next_child(parent, NULL); child != NULL; \ child = of_get_next_child(parent, child)) +extern struct device_node *of_find_node_with_property( + struct device_node *from, const char *prop_name); +#define for_each_node_with_property(dn, prop_name) \ + for (dn = of_find_node_with_property(NULL, prop_name); dn; \ + dn = of_find_node_with_property(dn, prop_name)) + extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); -- cgit v1.2.3 From 6fae35f9cea92793a98b2d9ab21235e5ae035581 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 17 Nov 2008 15:53:42 +0000 Subject: uwb: add basic radio manager The UWB radio manager coordinates the use of the radio between the PALs that may be using it. PALs request use of the radio with uwb_radio_start() and the radio manager will start beaconing if its not already doing so. When the last PAL has called uwb_radio_stop() beaconing will be stopped. In the future, the radio manager will have a more sophisticated channel selection algorithm, probably following the Channel Selection Policy from the WiMedia Alliance when it is finalized. For now, channel 9 (BG1, TFC1) is selected. The user may override the channel selected by the radio manager and may force the radio to stop beaconing. The WUSB Host Controller PAL makes use of this and there are two new debug PAL commands that can be used for testing. Signed-off-by: David Vrabel --- include/linux/uwb.h | 23 +++++++++++++++++++---- include/linux/uwb/debug-cmd.h | 2 ++ 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index effd97998fd1..7d3ebf046f9a 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -355,6 +355,7 @@ struct uwb_rc { u8 ctx_roll; int beaconing; /* Beaconing state [channel number] */ + int beaconing_forced; int scanning; enum uwb_scan_type scan_type:3; unsigned ready:1; @@ -373,8 +374,8 @@ struct uwb_rc { struct uwb_rc_cmd_set_ie *ies; size_t ies_capacity; - spinlock_t pal_lock; struct list_head pals; + int active_pals; struct uwb_dbg *dbg; }; @@ -382,11 +383,17 @@ struct uwb_rc { /** * struct uwb_pal - a UWB PAL - * @name: descriptive name for this PAL (wushc, wlp, etc.). + * @name: descriptive name for this PAL (wusbhc, wlp, etc.). * @device: a device for the PAL. Used to link the PAL and the radio * controller in sysfs. + * @rc: the radio controller the PAL uses. + * @channel_changed: called when the channel used by the radio changes. + * A channel of -1 means the channel has been stopped. * @new_rsv: called when a peer requests a reservation (may be NULL if * the PAL cannot accept reservation requests). + * @channel: channel being used by the PAL; 0 if the PAL isn't using + * the radio; -1 if the PAL wishes to use the radio but + * cannot. * * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP). @@ -405,12 +412,20 @@ struct uwb_pal { struct list_head node; const char *name; struct device *device; + struct uwb_rc *rc; + + void (*channel_changed)(struct uwb_pal *pal, int channel); void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv); + + int channel; }; void uwb_pal_init(struct uwb_pal *pal); -int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal); -void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal); +int uwb_pal_register(struct uwb_pal *pal); +void uwb_pal_unregister(struct uwb_pal *pal); + +int uwb_radio_start(struct uwb_pal *pal); +void uwb_radio_stop(struct uwb_pal *pal); /* * General public API diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h index 6a16566f0221..07efbe17db53 100644 --- a/include/linux/uwb/debug-cmd.h +++ b/include/linux/uwb/debug-cmd.h @@ -34,6 +34,8 @@ enum uwb_dbg_cmd_type { UWB_DBG_CMD_RSV_TERMINATE = 2, UWB_DBG_CMD_IE_ADD = 3, UWB_DBG_CMD_IE_RM = 4, + UWB_DBG_CMD_RADIO_START = 5, + UWB_DBG_CMD_RADIO_STOP = 6, }; struct uwb_dbg_cmd_rsv_establish { -- cgit v1.2.3 From e8e1594c8126b1b773988fa2e3bfec76cff88336 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 17 Nov 2008 16:16:51 +0000 Subject: wlp: start/stop radio on network interface up/down Signed-off-by: David Vrabel --- include/linux/wlp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/wlp.h b/include/linux/wlp.h index 033545e145c7..ac95ce6606ac 100644 --- a/include/linux/wlp.h +++ b/include/linux/wlp.h @@ -646,6 +646,7 @@ struct wlp_wss { struct wlp { struct mutex mutex; struct uwb_rc *rc; /* UWB radio controller */ + struct net_device *ndev; struct uwb_pal pal; struct wlp_eda eda; struct wlp_uuid uuid; @@ -675,7 +676,7 @@ struct wlp_wss_attribute { static struct wlp_wss_attribute wss_attr_##_name = __ATTR(_name, _mode, \ _show, _store) -extern int wlp_setup(struct wlp *, struct uwb_rc *); +extern int wlp_setup(struct wlp *, struct uwb_rc *, struct net_device *ndev); extern void wlp_remove(struct wlp *); extern ssize_t wlp_neighborhood_show(struct wlp *, char *); extern int wlp_wss_setup(struct net_device *, struct wlp_wss *); -- cgit v1.2.3 From 0996e6382482ce9014787693d3884e9468153a5c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 17 Nov 2008 16:23:22 +0000 Subject: uwb: remove unused beacon group join/leave events The UWB_NOTIF_BG_JOIN/UWB_NOTIF_BG_LEAVE events have been superceeded by the channel_changed callback in struct uwb_pal. Signed-off-by: David Vrabel --- include/linux/uwb.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index 7d3ebf046f9a..1719709d60ca 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -479,7 +479,6 @@ ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name, struct uwb_rccb *cmd, size_t cmd_size, u8 expected_type, u16 expected_event, struct uwb_rceb **preply); -int uwb_bg_joined(struct uwb_rc *rc); size_t __uwb_addr_print(char *, size_t, const unsigned char *, int); @@ -568,7 +567,9 @@ static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv) } /** - * Events generated by UWB that can be passed to any listeners + * enum uwb_notifs - UWB events that can be passed to any listeners + * @UWB_NOTIF_ONAIR: a new neighbour has joined the beacon group. + * @UWB_NOTIF_OFFAIR: a neighbour has left the beacon group. * * Higher layers can register callback functions with the radio * controller using uwb_notifs_register(). The radio controller @@ -576,8 +577,6 @@ static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv) * nodes when an event occurs. */ enum uwb_notifs { - UWB_NOTIF_BG_JOIN = 0, /* radio controller joined a beacon group */ - UWB_NOTIF_BG_LEAVE = 1, /* radio controller left a beacon group */ UWB_NOTIF_ONAIR, UWB_NOTIF_OFFAIR, }; -- cgit v1.2.3 From d314774cf2cd5dfeb39a00d37deee65d4c627927 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:32:24 -0800 Subject: netdev: network device operations infrastructure This patch changes the network device internal API to move adminstrative operations out of the network device structure and into a separate structure. This patch involves some hackery to maintain compatablity between the new and old model, so all 300+ drivers don't have to be changed at once. For drivers that aren't converted yet, the netdevice_ops virt function list still resides in the net_device structure. For old protocols, the new net_device_ops are copied out to the old net_device pointers. After the transistion is completed the nag message can be changed to an WARN_ON, and the compatiablity code can be made configurable. Some function pointers aren't moved: * destructor can't be in net_device_ops because it may need to be referenced after the module is unloaded. * neighbor setup is manipulated in a couple of places that need special consideration * hard_start_xmit is in the fast path for transmit. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 232 +++++++++++++++++++++++++++++++++------------- 1 file changed, 168 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f4469dc9..9060f5f3517a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -451,6 +451,131 @@ struct netdev_queue { struct Qdisc *qdisc_sleeping; } ____cacheline_aligned_in_smp; + +/* + * This structure defines the management hooks for network devices. + * The following hooks can bed defined and are optonal (can be null) + * unless otherwise noted. + * + * int (*ndo_init)(struct net_device *dev); + * This function is called once when network device is registered. + * The network device can use this to any late stage initializaton + * or semantic validattion. It can fail with an error code which will + * be propogated back to register_netdev + * + * void (*ndo_uninit)(struct net_device *dev); + * This function is called when device is unregistered or when registration + * fails. It is not called if init fails. + * + * int (*ndo_open)(struct net_device *dev); + * This function is called when network device transistions to the up + * state. + * + * int (*ndo_stop)(struct net_device *dev); + * This function is called when network device transistions to the down + * state. + * + * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); + * This function is called to allow device receiver to make + * changes to configuration when multicast or promiscious is enabled. + * + * void (*ndo_set_rx_mode)(struct net_device *dev); + * This function is called device changes address list filtering. + * + * void (*ndo_set_multicast_list)(struct net_device *dev); + * This function is called when the multicast address list changes. + * + * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); + * This function is called when the Media Access Control address + * needs to be changed. If not this interface is not defined, the + * mac address can not be changed. + * + * int (*ndo_validate_addr)(struct net_device *dev); + * Test if Media Access Control address is valid for the device. + * + * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Called when a user request an ioctl which can't be handled by + * the generic interface code. If not defined ioctl's return + * not supported error code. + * + * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); + * Used to set network devices bus interface parameters. This interface + * is retained for legacy reason, new devices should use the bus + * interface (PCI) for low level management. + * + * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + * Called when a user wants to change the Maximum Transfer Unit + * of a device. If not defined, any request to change MTU will + * will return an error. + * + * void (*ndo_tx_timeout) (struct net_device *dev); + * Callback uses when the transmitter has not made any progress + * for dev->watchdog ticks. + * + * struct net_device_stats* (*get_stats)(struct net_device *dev); + * Called when a user wants to get the network device usage + * statistics. If not defined, the counters in dev->stats will + * be used. + * + * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp); + * If device support VLAN receive accleration + * (ie. dev->features & NETIF_F_HW_VLAN_RX), then this function is called + * when vlan groups for the device changes. Note: grp is NULL + * if no vlan's groups are being used. + * + * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is registered. + * + * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is unregistered. + * + * void (*ndo_poll_controller)(struct net_device *dev); + */ +struct net_device_ops { + int (*ndo_init)(struct net_device *dev); + void (*ndo_uninit)(struct net_device *dev); + int (*ndo_open)(struct net_device *dev); + int (*ndo_stop)(struct net_device *dev); +#define HAVE_CHANGE_RX_FLAGS + void (*ndo_change_rx_flags)(struct net_device *dev, + int flags); +#define HAVE_SET_RX_MODE + void (*ndo_set_rx_mode)(struct net_device *dev); +#define HAVE_MULTICAST + void (*ndo_set_multicast_list)(struct net_device *dev); +#define HAVE_SET_MAC_ADDR + int (*ndo_set_mac_address)(struct net_device *dev, + void *addr); +#define HAVE_VALIDATE_ADDR + int (*ndo_validate_addr)(struct net_device *dev); +#define HAVE_PRIVATE_IOCTL + int (*ndo_do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); +#define HAVE_SET_CONFIG + int (*ndo_set_config)(struct net_device *dev, + struct ifmap *map); +#define HAVE_CHANGE_MTU + int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + +#define HAVE_TX_TIMEOUT + void (*ndo_tx_timeout) (struct net_device *dev); + + struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); + + void (*ndo_vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*ndo_vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER +#define HAVE_NETDEV_POLL + void (*ndo_poll_controller)(struct net_device *dev); +#endif +}; + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O @@ -498,11 +623,6 @@ struct net_device #ifdef CONFIG_NETPOLL struct list_head napi_list; #endif - - /* The device initialization function. Called only once. */ - int (*init)(struct net_device *dev); - - /* ------- Fields preinitialized in Space.c finish here ------- */ /* Net device features */ unsigned long features; @@ -546,15 +666,13 @@ struct net_device * for all in netdev_increment_features. */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ - NETIF_F_SG | NETIF_F_HIGHDMA | \ + NETIF_F_SG | NETIF_F_HIGHDMA | \ NETIF_F_FRAGLIST) /* Interface index. Unique device identifier */ int ifindex; int iflink; - - struct net_device_stats* (*get_stats)(struct net_device *dev); struct net_device_stats stats; #ifdef CONFIG_WIRELESS_EXT @@ -564,18 +682,13 @@ struct net_device /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data; #endif + /* Management operations */ + const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; /* Hardware header description */ const struct header_ops *header_ops; - /* - * This marks the end of the "visible" part of the structure. All - * fields hereafter are internal to the system, and may change at - * will (read: may be cleaned up at will). - */ - - unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ @@ -634,7 +747,7 @@ struct net_device unsigned long last_rx; /* Time of last Rx */ /* Interface address info used in eth_type_trans() */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast - because most packets are unicast) */ + because most packets are unicast) */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ @@ -648,6 +761,10 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; + /* Map buffer to appropriate transmit queue */ + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); + unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* @@ -662,9 +779,6 @@ struct net_device int watchdog_timeo; /* used by dev_watchdog() */ struct timer_list watchdog_timer; -/* - * refcnt is a very hot point, so align it on SMP - */ /* Number of references to this device */ atomic_t refcnt ____cacheline_aligned_in_smp; @@ -683,56 +797,14 @@ struct net_device NETREG_RELEASED, /* called free_netdev */ } reg_state; - /* Called after device is detached from network. */ - void (*uninit)(struct net_device *dev); - /* Called after last user reference disappears. */ - void (*destructor)(struct net_device *dev); + /* Called from unregister, can be used to call free_netdev */ + void (*destructor)(struct net_device *dev); - /* Pointers to interface service routines. */ - int (*open)(struct net_device *dev); - int (*stop)(struct net_device *dev); -#define HAVE_NETDEV_POLL -#define HAVE_CHANGE_RX_FLAGS - void (*change_rx_flags)(struct net_device *dev, - int flags); -#define HAVE_SET_RX_MODE - void (*set_rx_mode)(struct net_device *dev); -#define HAVE_MULTICAST - void (*set_multicast_list)(struct net_device *dev); -#define HAVE_SET_MAC_ADDR - int (*set_mac_address)(struct net_device *dev, - void *addr); -#define HAVE_VALIDATE_ADDR - int (*validate_addr)(struct net_device *dev); -#define HAVE_PRIVATE_IOCTL - int (*do_ioctl)(struct net_device *dev, - struct ifreq *ifr, int cmd); -#define HAVE_SET_CONFIG - int (*set_config)(struct net_device *dev, - struct ifmap *map); -#define HAVE_CHANGE_MTU - int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); -#define HAVE_TX_TIMEOUT - void (*tx_timeout) (struct net_device *dev); - - void (*vlan_rx_register)(struct net_device *dev, - struct vlan_group *grp); - void (*vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); - void (*vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); - - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif -#ifdef CONFIG_NET_POLL_CONTROLLER - void (*poll_controller)(struct net_device *dev); -#endif - - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); #ifdef CONFIG_NET_NS /* Network namespace this network device is inside */ @@ -763,6 +835,38 @@ struct net_device /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; + +#ifdef CONFIG_COMPAT_NET_DEV_OPS + struct { + int (*init)(struct net_device *dev); + void (*uninit)(struct net_device *dev); + int (*open)(struct net_device *dev); + int (*stop)(struct net_device *dev); + void (*change_rx_flags)(struct net_device *dev, + int flags); + void (*set_rx_mode)(struct net_device *dev); + void (*set_multicast_list)(struct net_device *dev); + int (*set_mac_address)(struct net_device *dev, + void *addr); + int (*validate_addr)(struct net_device *dev); + int (*do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*set_config)(struct net_device *dev, + struct ifmap *map); + int (*change_mtu)(struct net_device *dev, int new_mtu); + void (*tx_timeout) (struct net_device *dev); + struct net_device_stats* (*get_stats)(struct net_device *dev); + void (*vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER + void (*poll_controller)(struct net_device *dev); +#endif +#endif + }; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From eeda3fd64f75bcbfaa70ce946513abaf3f23b8e0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:40:23 -0800 Subject: netdev: introduce dev_get_stats() In order for the network device ops get_stats call to be immutable, the handling of the default internal network device stats block has to be changed. Add a new helper function which replaces the old use of internal_get_stats. Note: change return code to make it clear that the caller should not go changing the returned statistics. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9060f5f3517a..981a089d5149 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -864,9 +864,9 @@ struct net_device unsigned short vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev); -#endif #endif }; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1780,6 +1780,8 @@ extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); +extern const struct net_device_stats *dev_get_stats(struct net_device *dev); + extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); -- cgit v1.2.3 From ccad637b0c57de1825ffd34c311bf71487545ac2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 22:42:31 -0800 Subject: netdev: expose ethernet address primitives When ethernet devices are converted, the function pointer setup by eth_setup() need to be done during intialization. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 25d62e6e3290..0e5e97060034 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -41,6 +41,10 @@ extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh); extern void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); +extern int eth_mac_addr(struct net_device *dev, void *p); +extern int eth_change_mtu(struct net_device *dev, int new_mtu); +extern int eth_validate_addr(struct net_device *dev); + extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count); -- cgit v1.2.3 From d214c7537bbf2f247991fb65b3420b0b3d712c67 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Nov 2008 00:49:27 -0800 Subject: filter: add SKF_AD_NLATTR_NEST to look for nested attributes SKF_AD_NLATTR allows us to find the first matching attribute in a stream of netlink attributes from one offset to the end of the netlink message. This is not suitable to look for a specific matching inside a set of nested attributes. For example, in ctnetlink messages, if we look for the CTA_V6_SRC attribute in a message that talks about an IPv4 connection, SKF_AD_NLATTR returns the offset of CTA_STATUS which has the same value of CTA_V6_SRC but outside the nest. To differenciate CTA_STATUS and CTA_V6_SRC, we would have to make assumptions on the size of the attribute and the usual offset, resulting in horrible BSF code. This patch adds SKF_AD_NLATTR_NEST, which is a variant of SKF_AD_NLATTR, that looks for an attribute inside the limits of a nested attributes, but not further. This patch validates that we have enough room to look for the nested attributes - based on a suggestion from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index b6ea9aa9e853..1354aaf6abbe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -122,7 +122,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_PKTTYPE 4 #define SKF_AD_IFINDEX 8 #define SKF_AD_NLATTR 12 -#define SKF_AD_MAX 16 +#define SKF_AD_NLATTR_NEST 16 +#define SKF_AD_MAX 20 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) -- cgit v1.2.3 From 0c19b0adb8dd33dbd10ff48e41971231c486855c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Nov 2008 04:08:29 -0800 Subject: netlink: avoid memset of 0 bytes sparse warning A netlink attribute padding of zero triggers this sparse warning: include/linux/netlink.h:245:8: warning: memset with byte count of 0 Avoid the memset when the size parameter is constant and requires no padding. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9ff1b54908f3..51b09a1f46c3 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -242,7 +242,8 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) nlh->nlmsg_flags = flags; nlh->nlmsg_pid = pid; nlh->nlmsg_seq = seq; - memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); + if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) + memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); return nlh; } -- cgit v1.2.3 From 13d2a1d2b032de08d7dcab6a1edcd47802681f96 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Nov 2008 04:10:00 -0800 Subject: pkt_sched: add DRR scheduler Add classful DRR scheduler as a more flexible replacement for SFQ. The main difference to the algorithm described in "Efficient Fair Queueing using Deficit Round Robin" is that this implementation doesn't drop packets from the longest queue on overrun because its classful and limits are handled by each individual child qdisc. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 5d921fa91a5b..e3f133adba78 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -500,4 +500,20 @@ struct tc_netem_corrupt #define NETEM_DIST_SCALE 8192 +/* DRR */ + +enum +{ + TCA_DRR_UNSPEC, + TCA_DRR_QUANTUM, + __TCA_DRR_MAX +}; + +#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) + +struct tc_drr_stats +{ + u32 deficit; +}; + #endif -- cgit v1.2.3 From 018a7bf1e55000dd792194238c9043918d24d3dd Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 20 Nov 2008 15:59:56 +0100 Subject: netfilter: ip{,6}t_policy.h should include xp_policy.h It seems that all of the include/netfilter_{ipv4,ipv6}/{ipt,ip6t}_*.h which share constants include the corresponding include/netfilter/xp_*.h files. Neither ipt_policy.h not ip6t_policy.h do. Make these consistant with the norm. Signed-off-by: Andy Whitcroft Signed-off-by: Patrick McHardy --- include/linux/netfilter_ipv4/ipt_policy.h | 2 ++ include/linux/netfilter_ipv6/ip6t_policy.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h index b9478a255301..1037fb2cd206 100644 --- a/include/linux/netfilter_ipv4/ipt_policy.h +++ b/include/linux/netfilter_ipv4/ipt_policy.h @@ -1,6 +1,8 @@ #ifndef _IPT_POLICY_H #define _IPT_POLICY_H +#include + #define IPT_POLICY_MAX_ELEM XT_POLICY_MAX_ELEM /* ipt_policy_flags */ diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h index 6bab3163d2fb..b1c449d7ec89 100644 --- a/include/linux/netfilter_ipv6/ip6t_policy.h +++ b/include/linux/netfilter_ipv6/ip6t_policy.h @@ -1,6 +1,8 @@ #ifndef _IP6T_POLICY_H #define _IP6T_POLICY_H +#include + #define IP6T_POLICY_MAX_ELEM XT_POLICY_MAX_ELEM /* ip6t_policy_flags */ -- cgit v1.2.3 From 008298231abbeb91bc7be9e8b078607b816d1a4a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:14:53 -0800 Subject: netdev: add more functions to netdevice ops This patch moves neigh_setup and hard_start_xmit into the network device ops structure. For bisection, fix all the previously converted drivers as well. Bonding driver took the biggest hit on this. Added a prefetch of the hard_start_xmit in the fast path to try and reduce any impact this would have. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 981a089d5149..d8fb23679ee3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -454,8 +454,8 @@ struct netdev_queue { /* * This structure defines the management hooks for network devices. - * The following hooks can bed defined and are optonal (can be null) - * unless otherwise noted. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); * This function is called once when network device is registered. @@ -475,6 +475,15 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * + * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * Called when a packet needs to be transmitted. + * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, + * Required can not be NULL. + * + * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); + * Called to decide which queue to when device supports multiple + * transmit queues. + * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make * changes to configuration when multicast or promiscious is enabled. @@ -508,7 +517,7 @@ struct netdev_queue { * of a device. If not defined, any request to change MTU will * will return an error. * - * void (*ndo_tx_timeout) (struct net_device *dev); + * void (*ndo_tx_timeout)(struct net_device *dev); * Callback uses when the transmitter has not made any progress * for dev->watchdog ticks. * @@ -538,6 +547,10 @@ struct net_device_ops { void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); + int (*ndo_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*ndo_select_queue)(struct net_device *dev, + struct sk_buff *skb); #define HAVE_CHANGE_RX_FLAGS void (*ndo_change_rx_flags)(struct net_device *dev, int flags); @@ -557,8 +570,10 @@ struct net_device_ops { int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); #define HAVE_CHANGE_MTU - int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); - + int (*ndo_change_mtu)(struct net_device *dev, + int new_mtu); + int (*ndo_neigh_setup)(struct net_device *dev, + struct neigh_parms *); #define HAVE_TX_TIMEOUT void (*ndo_tx_timeout) (struct net_device *dev); @@ -761,18 +776,12 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; - /* Map buffer to appropriate transmit queue */ - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); - unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* * One part is mostly used on xmit path (device) */ void *priv; /* pointer to private data */ - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ @@ -800,8 +809,6 @@ struct net_device /* Called from unregister, can be used to call free_netdev */ void (*destructor)(struct net_device *dev); - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); - #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif @@ -842,6 +849,10 @@ struct net_device void (*uninit)(struct net_device *dev); int (*open)(struct net_device *dev); int (*stop)(struct net_device *dev); + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); void (*change_rx_flags)(struct net_device *dev, int flags); void (*set_rx_mode)(struct net_device *dev); @@ -854,6 +865,8 @@ struct net_device int (*set_config)(struct net_device *dev, struct ifmap *map); int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, + struct neigh_parms *); void (*tx_timeout) (struct net_device *dev); struct net_device_stats* (*get_stats)(struct net_device *dev); void (*vlan_rx_register)(struct net_device *dev, -- cgit v1.2.3 From 145186a39570244aead77dc2efc559e5cac90548 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:29:48 -0800 Subject: fddi: convert to new network device ops Similar to ethernet. Convert infrastructure and the one lone FDDI driver (for the one lone user of that hardware??). Compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/fddidevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index e61e42dfd317..155bafd9e886 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -27,6 +27,7 @@ #ifdef __KERNEL__ extern __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); +extern int fddi_change_mtu(struct net_device *dev, int new_mtu); extern struct net_device *alloc_fddidev(int sizeof_priv); #endif -- cgit v1.2.3 From 748ff68fad9600593c6abe47856037602bd5d133 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:32:15 -0800 Subject: hippi: convert driver to net_device_ops Convert the HIPPI infrastructure for use with net_device_ops. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hippidevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index bab303dafd6e..f148e4908410 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -32,7 +32,9 @@ struct hippi_cb { }; extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); - +extern int hippi_change_mtu(struct net_device *dev, int new_mtu); +extern int hippi_mac_addr(struct net_device *dev, void *p); +extern int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); extern struct net_device *alloc_hippi_dev(int sizeof_priv); #endif -- cgit v1.2.3 From 2f90b8657ec942d1880f720e0177ee71df7c8e3c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 20:52:10 -0800 Subject: ixgbe: this patch adds support for DCB to the kernel and ixgbe driver This adds support for Data Center Bridging (DCB) features in the ixgbe driver and adds an rtnetlink interface for configuring DCB to the kernel. The DCB feature support included are Priority Grouping (PG) - which allows bandwidth guarantees to be allocated to groups to traffic based on the 802.1q priority, and Priority Based Flow Control (PFC) - which introduces a new MAC control PAUSE frame which works at granularity of the 802.1p priority instead of the link (IEEE 802.3x). Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 230 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/netdevice.h | 8 ++ include/linux/rtnetlink.h | 5 + 3 files changed, 243 insertions(+) create mode 100644 include/linux/dcbnl.h (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h new file mode 100644 index 000000000000..32d32c1ee410 --- /dev/null +++ b/include/linux/dcbnl.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#ifndef __LINUX_DCBNL_H__ +#define __LINUX_DCBNL_H__ + +#define DCB_PROTO_VERSION 1 + +struct dcbmsg { + unsigned char dcb_family; + __u8 cmd; + __u16 dcb_pad; +}; + +/** + * enum dcbnl_commands - supported DCB commands + * + * @DCB_CMD_UNDEFINED: unspecified command to catch errors + * @DCB_CMD_GSTATE: request the state of DCB in the device + * @DCB_CMD_SSTATE: set the state of DCB in the device + * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx + * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx + * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx + * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx + * @DCB_CMD_PFC_GCFG: request the priority flow control configuration + * @DCB_CMD_PFC_SCFG: set the priority flow control configuration + * @DCB_CMD_SET_ALL: apply all changes to the underlying device + * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying + * device. Only useful when using bonding. + */ +enum dcbnl_commands { + DCB_CMD_UNDEFINED, + + DCB_CMD_GSTATE, + DCB_CMD_SSTATE, + + DCB_CMD_PGTX_GCFG, + DCB_CMD_PGTX_SCFG, + DCB_CMD_PGRX_GCFG, + DCB_CMD_PGRX_SCFG, + + DCB_CMD_PFC_GCFG, + DCB_CMD_PFC_SCFG, + + DCB_CMD_SET_ALL, + DCB_CMD_GPERM_HWADDR, + + __DCB_CMD_ENUM_MAX, + DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, +}; + + +/** + * enum dcbnl_attrs - DCB top-level netlink attributes + * + * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING) + * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8) + * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8) + * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED) + * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8) + * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED) + * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) + * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) + */ +enum dcbnl_attrs { + DCB_ATTR_UNDEFINED, + + DCB_ATTR_IFNAME, + DCB_ATTR_STATE, + DCB_ATTR_PFC_STATE, + DCB_ATTR_PFC_CFG, + DCB_ATTR_NUM_TC, + DCB_ATTR_PG_CFG, + DCB_ATTR_SET_ALL, + DCB_ATTR_PERM_HWADDR, + + __DCB_ATTR_ENUM_MAX, + DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs + * + * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8) + * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8) + * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8) + * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8) + * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8) + * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8) + * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8) + * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8) + * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined + * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG) + * + */ +enum dcbnl_pfc_up_attrs { + DCB_PFC_UP_ATTR_UNDEFINED, + + DCB_PFC_UP_ATTR_0, + DCB_PFC_UP_ATTR_1, + DCB_PFC_UP_ATTR_2, + DCB_PFC_UP_ATTR_3, + DCB_PFC_UP_ATTR_4, + DCB_PFC_UP_ATTR_5, + DCB_PFC_UP_ATTR_6, + DCB_PFC_UP_ATTR_7, + DCB_PFC_UP_ATTR_ALL, + + __DCB_PFC_UP_ATTR_ENUM_MAX, + DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pg_attrs - DCB Priority Group attributes + * + * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED) + * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG) + * + */ +enum dcbnl_pg_attrs { + DCB_PG_ATTR_UNDEFINED, + + DCB_PG_ATTR_TC_0, + DCB_PG_ATTR_TC_1, + DCB_PG_ATTR_TC_2, + DCB_PG_ATTR_TC_3, + DCB_PG_ATTR_TC_4, + DCB_PG_ATTR_TC_5, + DCB_PG_ATTR_TC_6, + DCB_PG_ATTR_TC_7, + DCB_PG_ATTR_TC_MAX, + DCB_PG_ATTR_TC_ALL, + + DCB_PG_ATTR_BW_ID_0, + DCB_PG_ATTR_BW_ID_1, + DCB_PG_ATTR_BW_ID_2, + DCB_PG_ATTR_BW_ID_3, + DCB_PG_ATTR_BW_ID_4, + DCB_PG_ATTR_BW_ID_5, + DCB_PG_ATTR_BW_ID_6, + DCB_PG_ATTR_BW_ID_7, + DCB_PG_ATTR_BW_ID_MAX, + DCB_PG_ATTR_BW_ID_ALL, + + __DCB_PG_ATTR_ENUM_MAX, + DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_tc_attrs - DCB Traffic Class attributes + * + * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors + * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to + * Valid values are: 0-7 + * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map + * Some devices may not support changing the + * user priority map of a TC. + * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting + * 0 - none + * 1 - group strict + * 2 - link strict + * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and + * not configured to use link strict priority, + * this is the percentage of bandwidth of the + * priority group this traffic class belongs to + * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters + * + */ +enum dcbnl_tc_attrs { + DCB_TC_ATTR_PARAM_UNDEFINED, + + DCB_TC_ATTR_PARAM_PGID, + DCB_TC_ATTR_PARAM_UP_MAPPING, + DCB_TC_ATTR_PARAM_STRICT_PRIO, + DCB_TC_ATTR_PARAM_BW_PCT, + DCB_TC_ATTR_PARAM_ALL, + + __DCB_TC_ATTR_PARAM_ENUM_MAX, + DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1, +}; + +/** + * enum dcb_general_attr_values - general DCB attribute values + * + * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported + * + */ +enum dcb_general_attr_values { + DCB_ATTR_VALUE_UNDEFINED = 0xff +}; + + +#endif /* __LINUX_DCBNL_H__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8fb23679ee3..6095af572dfd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,6 +43,9 @@ #include #include +#ifdef CONFIG_DCBNL +#include +#endif struct vlan_group; struct ethtool_ops; @@ -843,6 +846,11 @@ struct net_device #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; +#ifdef CONFIG_DCBNL + /* Data Center Bridging netlink ops */ + struct dcbnl_rtnl_ops *dcbnl_ops; +#endif + #ifdef CONFIG_COMPAT_NET_DEV_OPS struct { int (*init)(struct net_device *dev); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2b3d51c6ec9c..e88f7058b3a1 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -107,6 +107,11 @@ enum { RTM_GETADDRLABEL, #define RTM_GETADDRLABEL RTM_GETADDRLABEL + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; -- cgit v1.2.3 From 46132188bf72e22ef097f16ed5c969ee8cea1e8b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:05:08 -0800 Subject: DCB: Add interface to query for the DCB capabilities of an device. Adds to the netlink interface for Data Center Bridging (DCB), allowing the DCB capabilities supported by a device to be queried. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 32d32c1ee410..13f0c638a695 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -43,6 +43,7 @@ struct dcbmsg { * @DCB_CMD_SET_ALL: apply all changes to the underlying device * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying * device. Only useful when using bonding. + * @DCB_CMD_GCAP: request the DCB capabilities of the device */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -60,6 +61,7 @@ enum dcbnl_commands { DCB_CMD_SET_ALL, DCB_CMD_GPERM_HWADDR, + DCB_CMD_GCAP, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, @@ -78,6 +80,7 @@ enum dcbnl_commands { * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED) * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) + * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -90,6 +93,7 @@ enum dcbnl_attrs { DCB_ATTR_PG_CFG, DCB_ATTR_SET_ALL, DCB_ATTR_PERM_HWADDR, + DCB_ATTR_CAP, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -216,6 +220,39 @@ enum dcbnl_tc_attrs { DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1, }; +/** + * enum dcbnl_cap_attrs - DCB Capability attributes + * + * @DCB_CAP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_CAP_ATTR_ALL: (NLA_FLAG) all capability parameters + * @DCB_CAP_ATTR_PG: (NLA_U8) device supports Priority Groups + * @DCB_CAP_ATTR_PFC: (NLA_U8) device supports Priority Flow Control + * @DCB_CAP_ATTR_UP2TC: (NLA_U8) device supports user priority to + * traffic class mapping + * @DCB_CAP_ATTR_PG_TCS: (NLA_U8) bitmap where each bit represents a + * number of traffic classes the device + * can be configured to use for Priority Groups + * @DCB_CAP_ATTR_PFC_TCS: (NLA_U8) bitmap where each bit represents a + * number of traffic classes the device can be + * configured to use for Priority Flow Control + * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority + * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion + * Notification + */ +enum dcbnl_cap_attrs { + DCB_CAP_ATTR_UNDEFINED, + DCB_CAP_ATTR_ALL, + DCB_CAP_ATTR_PG, + DCB_CAP_ATTR_PFC, + DCB_CAP_ATTR_UP2TC, + DCB_CAP_ATTR_PG_TCS, + DCB_CAP_ATTR_PFC_TCS, + DCB_CAP_ATTR_GSP, + DCB_CAP_ATTR_BCN, + + __DCB_CAP_ATTR_ENUM_MAX, + DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1, +}; /** * enum dcb_general_attr_values - general DCB attribute values * -- cgit v1.2.3 From 33dbabc4a7f7bd72313c73a3c199f31f3900336f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:08:19 -0800 Subject: DCB: Add interface to query # of TCs supported by device Adds interface for Data Center Bridging (DCB) to query (and set if supported) the number of traffic classes currently supported by the device for the two (DCB) features: priority groups (PG) and priority flow control (PFC). Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 13f0c638a695..1077fba1dadc 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -44,6 +44,8 @@ struct dcbmsg { * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying * device. Only useful when using bonding. * @DCB_CMD_GCAP: request the DCB capabilities of the device + * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported + * @DCB_CMD_SNUMTCS: set the number of traffic classes */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -62,6 +64,8 @@ enum dcbnl_commands { DCB_CMD_SET_ALL, DCB_CMD_GPERM_HWADDR, DCB_CMD_GCAP, + DCB_CMD_GNUMTCS, + DCB_CMD_SNUMTCS, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, @@ -81,6 +85,7 @@ enum dcbnl_commands { * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) + * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -94,6 +99,7 @@ enum dcbnl_attrs { DCB_ATTR_SET_ALL, DCB_ATTR_PERM_HWADDR, DCB_ATTR_CAP, + DCB_ATTR_NUMTCS, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -253,6 +259,27 @@ enum dcbnl_cap_attrs { __DCB_CAP_ATTR_ENUM_MAX, DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1, }; + +/** + * enum dcbnl_numtcs_attrs - number of traffic classes + * + * @DCB_NUMTCS_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_NUMTCS_ATTR_ALL: (NLA_FLAG) all traffic class attributes + * @DCB_NUMTCS_ATTR_PG: (NLA_U8) number of traffic classes used for + * priority groups + * @DCB_NUMTCS_ATTR_PFC: (NLA_U8) number of traffic classes which can + * support priority flow control + */ +enum dcbnl_numtcs_attrs { + DCB_NUMTCS_ATTR_UNDEFINED, + DCB_NUMTCS_ATTR_ALL, + DCB_NUMTCS_ATTR_PG, + DCB_NUMTCS_ATTR_PFC, + + __DCB_NUMTCS_ATTR_ENUM_MAX, + DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1, +}; + /** * enum dcb_general_attr_values - general DCB attribute values * -- cgit v1.2.3 From 0eb3aa9bab20217fb42244ccdcb5bf8a002f504c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:09:23 -0800 Subject: DCB: Add interface to query the state of PFC feature. Adds a netlink interface for Data Center Bridging (DCB) to get and set the enable state of the Priority Flow Control (PFC) feature. Primarily, this is a way to turn off PFC in the driver while DCB remains enabled. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 1077fba1dadc..6cc4560bc376 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -66,6 +66,8 @@ enum dcbnl_commands { DCB_CMD_GCAP, DCB_CMD_GNUMTCS, DCB_CMD_SNUMTCS, + DCB_CMD_PFC_GSTATE, + DCB_CMD_PFC_SSTATE, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, -- cgit v1.2.3 From 859ee3c43812051e21816c6d6d4cc04fb7ce9b2e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:10:23 -0800 Subject: DCB: Add support for DCB BCN Adds an interface to configure the Backward Congestion Notification (BCN) feature. In a BCN capabale network, congestion notifications from congested points out in the network can cause the end station limit the rate of a given traffic flow. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 6cc4560bc376..e73a61449ad6 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -46,6 +46,8 @@ struct dcbmsg { * @DCB_CMD_GCAP: request the DCB capabilities of the device * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported * @DCB_CMD_SNUMTCS: set the number of traffic classes + * @DCB_CMD_GBCN: set backward congestion notification configuration + * @DCB_CMD_SBCN: get backward congestion notification configration. */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -62,18 +64,24 @@ enum dcbnl_commands { DCB_CMD_PFC_SCFG, DCB_CMD_SET_ALL, + DCB_CMD_GPERM_HWADDR, + DCB_CMD_GCAP, + DCB_CMD_GNUMTCS, DCB_CMD_SNUMTCS, + DCB_CMD_PFC_GSTATE, DCB_CMD_PFC_SSTATE, + DCB_CMD_BCN_GCFG, + DCB_CMD_BCN_SCFG, + __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, }; - /** * enum dcbnl_attrs - DCB top-level netlink attributes * @@ -88,6 +96,7 @@ enum dcbnl_commands { * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) + * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -102,6 +111,7 @@ enum dcbnl_attrs { DCB_ATTR_PERM_HWADDR, DCB_ATTR_CAP, DCB_ATTR_NUMTCS, + DCB_ATTR_BCN, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -282,6 +292,38 @@ enum dcbnl_numtcs_attrs { DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1, }; +enum dcbnl_bcn_attrs{ + DCB_BCN_ATTR_UNDEFINED = 0, + + DCB_BCN_ATTR_RP_0, + DCB_BCN_ATTR_RP_1, + DCB_BCN_ATTR_RP_2, + DCB_BCN_ATTR_RP_3, + DCB_BCN_ATTR_RP_4, + DCB_BCN_ATTR_RP_5, + DCB_BCN_ATTR_RP_6, + DCB_BCN_ATTR_RP_7, + DCB_BCN_ATTR_RP_ALL, + + DCB_BCN_ATTR_ALPHA, + DCB_BCN_ATTR_BETA, + DCB_BCN_ATTR_GD, + DCB_BCN_ATTR_GI, + DCB_BCN_ATTR_TMAX, + DCB_BCN_ATTR_TD, + DCB_BCN_ATTR_RMIN, + DCB_BCN_ATTR_W, + DCB_BCN_ATTR_RD, + DCB_BCN_ATTR_RU, + DCB_BCN_ATTR_WRTT, + DCB_BCN_ATTR_RI, + DCB_BCN_ATTR_C, + DCB_BCN_ATTR_ALL, + + __DCB_BCN_ATTR_ENUM_MAX, + DCB_BCN_ATTR_MAX = __DCB_BCN_ATTR_ENUM_MAX - 1, +}; + /** * enum dcb_general_attr_values - general DCB attribute values * -- cgit v1.2.3 From 2baf8a2daab65cdd3f20bfeb4676a2f6aff7c3bf Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Fri, 21 Nov 2008 16:34:18 -0800 Subject: netdevice hdlc: Convert directly reference of netdev->priv For killing directly reference of netdev->priv, use netdev->ml_priv to replace it. Because the private pvc data comes from add_pvc() and can't be allocated in alloc_netdev(). Signed-off-by: Wang Chen Acked-by: Krzysztof Halasa Signed-off-by: David S. Miller --- include/linux/hdlc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index c59769693bee..e960faac609d 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -80,7 +80,7 @@ struct net_device *alloc_hdlcdev(void *priv); static inline struct hdlc_device* dev_to_hdlc(struct net_device *dev) { - return dev->priv; + return netdev_priv(dev); } static __inline__ void debug_frame(const struct sk_buff *skb) -- cgit v1.2.3 From f201ae2356c74bcae130b2177b3dca903ea98071 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Nov 2008 06:22:56 +0100 Subject: tracing/function-return-tracer: store return stack into task_struct and allocate it dynamically Impact: use deeper function tracing depth safely Some tests showed that function return tracing needed a more deeper depth of function calls. But it could be unsafe to store these return addresses to the stack. So these arrays will now be allocated dynamically into task_struct of current only when the tracer is activated. Typical scheme when tracer is activated: - allocate a return stack for each task in global list. - fork: allocate the return stack for the newly created task - exit: free return stack of current - idle init: same as fork I chose a default depth of 50. I don't have overruns anymore. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 +++++ include/linux/sched.h | 23 +++++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e128..2ba259b2defa 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -323,6 +323,8 @@ struct ftrace_retfunc { }; #ifdef CONFIG_FUNCTION_RET_TRACER +#define FTRACE_RETFUNC_DEPTH 50 +#define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); @@ -330,6 +332,9 @@ extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); + +extern void ftrace_retfunc_init_task(struct task_struct *t); +extern void ftrace_retfunc_exit_task(struct task_struct *t); #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8e0db464206..bee1e93c95ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1352,6 +1352,17 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; +#ifdef CONFIG_FUNCTION_RET_TRACER + /* Index of current stored adress in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack *ret_stack; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; +#endif }; /* @@ -2006,18 +2017,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* - * When fork() creates a child process, this function is called. - * But the child task may not inherit the return adresses traced - * by the return function tracer because it will directly execute - * in userspace and will not return to kernel functions its parent - * used. - */ - task_thread_info(p)->curr_ret_stack = -1; - atomic_set(&task_thread_info(p)->trace_overrun, 0); -#endif } static inline unsigned long *end_of_stack(struct task_struct *p) -- cgit v1.2.3 From 82f60f0bc854aada696f27d863c03bef91f1509d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 23 Nov 2008 09:18:56 +0100 Subject: tracing/function-return-tracer: clean up task start/exit callbacks Impact: cleanup Eliminate #ifdefs in core code by using empty inline functions. Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2ba259b2defa..938ca1942641 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -335,6 +335,9 @@ extern void unregister_ftrace_return(void); extern void ftrace_retfunc_init_task(struct task_struct *t); extern void ftrace_retfunc_exit_task(struct task_struct *t); +#else +static inline void ftrace_retfunc_init_task(struct task_struct *t) { } +static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 02b67518e2b1c490787dac7f35e1204e74fe21ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sat, 22 Nov 2008 13:28:47 +0200 Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: add new (default-off) tracing visualization feature Usage example: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled .... run application ... echo 0 >tracing_enabled Then read one of 'trace','latency_trace','trace_pipe'. To get the best output you can compile your userspace programs with frame pointers (at least glibc + the app you are tracing). Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- include/linux/stacktrace.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b106fd8e0d5c..68de51468f5d 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); + +#ifdef CONFIG_X86 +extern void save_stack_trace_user(struct stack_trace *trace); +#else +# define save_stack_trace_user(trace) do { } while (0) +#endif + #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif -- cgit v1.2.3 From 74e2f334f4440cbcb63e9ebbcdcea430d41bdfa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sat, 22 Nov 2008 13:28:48 +0200 Subject: vfs, seqfile: make mangle_path() global MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: expose new VFS API make mangle_path() available, as per the suggestions of Christoph Hellwig and Al Viro: http://lkml.org/lkml/2008/11/4/338 Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dc50bcc282a8..b3dfa72f13b9 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -34,6 +34,7 @@ struct seq_operations { #define SEQ_SKIP 1 +char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); -- cgit v1.2.3 From 42f565e116e0408b5ddc21a33c4a4d41fd572420 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Nov 2008 23:57:47 -0500 Subject: trace: remove extra assign in branch check Impact: clean up of branch check The unlikely/likely profiler does an extra assign of the f.line. This is not needed since it is already calculated at compile time. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c7d804a7a4d6..c25e525121f0 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -87,7 +87,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .file = __FILE__, \ .line = __LINE__, \ }; \ - ______f.line = __LINE__; \ ______r = likely_notrace(x); \ ftrace_likely_update(&______f, ______r, 1); \ ______r; \ @@ -102,7 +101,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .file = __FILE__, \ .line = __LINE__, \ }; \ - ______f.line = __LINE__; \ ______r = unlikely_notrace(x); \ ftrace_likely_update(&______f, ______r, 0); \ ______r; \ -- cgit v1.2.3 From 45b797492a0758e64dff74e9db70e1f65e0603a5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 00:40:40 -0500 Subject: trace: consolidate unlikely and likely profiler Impact: clean up to make one profiler of like and unlikely tracer The likely and unlikely profiler prints out the file and line numbers of the annotated branches that it is profiling. It shows the number of times it was correct or incorrect in its guess. Having two different files or sections for that matter to tell us if it was a likely or unlikely is pretty pointless. We really only care if it was correct or not. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c25e525121f0..0628a2013fae 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -77,32 +77,18 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) -#define likely_check(x) ({ \ +#define __branch_check__(x, expect) ({ \ int ______r; \ static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_likely"))) \ + __attribute__((section("_ftrace_annotated_branch"))) \ ______f = { \ .func = __func__, \ .file = __FILE__, \ .line = __LINE__, \ }; \ ______r = likely_notrace(x); \ - ftrace_likely_update(&______f, ______r, 1); \ - ______r; \ - }) -#define unlikely_check(x) ({ \ - int ______r; \ - static struct ftrace_branch_data \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_unlikely"))) \ - ______f = { \ - .func = __func__, \ - .file = __FILE__, \ - .line = __LINE__, \ - }; \ - ______r = unlikely_notrace(x); \ - ftrace_likely_update(&______f, ______r, 0); \ + ftrace_likely_update(&______f, ______r, expect); \ ______r; \ }) @@ -112,10 +98,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); * written by Daniel Walker. */ # ifndef likely -# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1)) # endif # ifndef unlikely -# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) # endif #else # define likely(x) __builtin_expect(!!(x), 1) -- cgit v1.2.3 From 2bcd521a684cc94befbe2ce7d5b613c841b0d304 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 01:30:54 -0500 Subject: trace: profile all if conditionals Impact: feature to profile if statements This patch adds a branch profiler for all if () statements. The results will be found in: /debugfs/tracing/profile_branch For example: miss hit % Function File Line ------- --------- - -------- ---- ---- 0 1 100 x86_64_start_reservations head64.c 127 0 1 100 copy_bootdata head64.c 69 1 0 0 x86_64_start_kernel head64.c 111 32 0 0 set_intr_gate desc.h 319 1 0 0 reserve_ebda_region head.c 51 1 0 0 reserve_ebda_region head.c 47 0 1 100 reserve_ebda_region head.c 42 0 0 X maxcpus main.c 165 Miss means the branch was not taken. Hit means the branch was taken. The percent is the percentage the branch was taken. This adds a significant amount of overhead and should only be used by those analyzing their system. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0628a2013fae..ea7c6be354b7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -63,8 +63,16 @@ struct ftrace_branch_data { const char *func; const char *file; unsigned line; - unsigned long correct; - unsigned long incorrect; + union { + struct { + unsigned long correct; + unsigned long incorrect; + }; + struct { + unsigned long miss; + unsigned long hit; + }; + }; }; /* @@ -103,6 +111,32 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # ifndef unlikely # define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) # endif + +#ifdef CONFIG_PROFILE_ALL_BRANCHES +/* + * "Define 'is'", Bill Clinton + * "Define 'if'", Steven Rostedt + */ +#define if(cond) if (__builtin_constant_p((cond)) ? !!(cond) : \ + ({ \ + int ______r; \ + static struct ftrace_branch_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_branch"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______r = !!(cond); \ + if (______r) \ + ______f.hit++; \ + else \ + ______f.miss++; \ + ______r; \ + })) +#endif /* CONFIG_PROFILE_ALL_BRANCHES */ + #else # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) -- cgit v1.2.3 From 033601a32b2012b6948e80e739cca40bff4de4a0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 12:41:55 -0500 Subject: ring-buffer: add tracing_off_permanent Impact: feature to permanently disable ring buffer This patch adds a API to the ring buffer code that will permanently disable the ring buffer from ever recording. This should only be called when some serious anomaly is detected, and the system may be in an unstable state. When that happens, shutting down the recording to the ring buffers may be appropriate. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e097c2e6b6dc..3bb87a753fa3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -122,6 +122,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); void tracing_on(void); void tracing_off(void); +void tracing_off_permanent(void); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, -- cgit v1.2.3 From 69bb54ec05f57da7f6fac2cec0820cbc970df20f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 12:59:38 -0500 Subject: ftrace: add ftrace_off_permanent Impact: add new API to disable all of ftrace on anomalies It case of a serious anomaly being detected (like something caught by lockdep) it is a good idea to disable all tracing immediately, without grabing any locks. This patch adds ftrace_off_permanent that disables the tracers, function tracing and ring buffers without a way to enable them again. This should only be used when something serious has been detected. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e128..13e9cfc09928 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -257,6 +257,7 @@ extern int ftrace_dump_on_oops; extern void tracing_start(void); extern void tracing_stop(void); +extern void ftrace_off_permanent(void); extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -290,6 +291,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } +static inline void ftrace_off_permanent(void) { } static inline int ftrace_printk(const char *fmt, ...) { -- cgit v1.2.3 From 8d7c6a96164651dbbab449ef0b5c20ae1f76a3a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sun, 23 Nov 2008 12:39:06 +0200 Subject: tracing/stack-tracer: fix style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- include/linux/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 68de51468f5d..fd42d6851109 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -25,7 +25,7 @@ extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) -# define save_stack_trace_user(trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif -- cgit v1.2.3 From 8d26487fd4ddda7a0237da418fb8669fb06ae557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sun, 23 Nov 2008 12:39:08 +0200 Subject: tracing/stack-tracer: introduce CONFIG_USER_STACKTRACE_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup User stack tracing is just implemented for x86, but it is not x86 specific. Introduce a generic config flag, that is currently enabled only for x86. When other arches implement it, they will have to SELECT USER_STACKTRACE_SUPPORT. Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- include/linux/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index fd42d6851109..1a8cecc4f38c 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -16,7 +16,7 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, extern void print_stack_trace(struct stack_trace *trace, int spaces); -#ifdef CONFIG_X86 +#ifdef CONFIG_USER_STACKTRACE_SUPPORT extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace_user(trace) do { } while (0) -- cgit v1.2.3 From b20a9c24d5c5d466d7e4a25c6f1bedbd2d16ad4f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 23 Nov 2008 16:02:31 -0800 Subject: dccp: Set per-connection CCIDs via socket options With this patch, TX/RX CCIDs can now be changed on a per-connection basis, which overrides the defaults set by the global sysctl variables for TX/RX CCIDs. To make full use of this facility, the remaining patches of this patch set are needed, which track dependencies and activate negotiated feature values. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/dccp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eda389ce04f4..6a72ff52a8a4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -168,6 +168,8 @@ enum { DCCPO_MIN_CCID_SPECIFIC = 128, DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ +#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -203,6 +205,9 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 #define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 +#define DCCP_SOCKOPT_CCID 13 +#define DCCP_SOCKOPT_TX_CCID 14 +#define DCCP_SOCKOPT_RX_CCID 15 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.2.3 From 1f87e235e6fb92c2968b52b9191de04f1aff8e77 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 23:24:32 -0800 Subject: eth: Declare an optimized compare_ether_addr_64bits() function Linus mentioned we could try to perform long word operations, even on potentially unaligned addresses, on x86 at least. David mentioned the HAVE_EFFICIENT_UNALIGNED_ACCESS test to handle this on all arches that have efficient unailgned accesses. I tried this idea and got nice assembly on 32 bits: 158: 33 82 38 01 00 00 xor 0x138(%edx),%eax 15e: 33 8a 34 01 00 00 xor 0x134(%edx),%ecx 164: c1 e0 10 shl $0x10,%eax 167: 09 c1 or %eax,%ecx 169: 74 0b je 176 And very nice assembly on 64 bits of course (one xor, one shl) Nice oprofile improvement in eth_type_trans(), 0.17 % instead of 0.41 %, expected since we remove 8 instructions on a fast path. This patch implements a compare_ether_addr_64bits() function, that uses the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS ifdef to efficiently perform the 6 bytes comparison on all capable arches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 0e5e97060034..1cb0f0b90926 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef __KERNEL__ extern __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); @@ -140,6 +141,47 @@ static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2) BUILD_BUG_ON(ETH_ALEN != 6); return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; } + +static inline unsigned long zap_last_2bytes(unsigned long value) +{ +#ifdef __BIG_ENDIAN + return value >> 16; +#else + return value << 16; +#endif +} + +/** + * compare_ether_addr_64bits - Compare two Ethernet addresses + * @addr1: Pointer to an array of 8 bytes + * @addr2: Pointer to an other array of 8 bytes + * + * Compare two ethernet addresses, returns 0 if equal. + * Same result than "memcmp(addr1, addr2, ETH_ALEN)" but without conditional + * branches, and possibly long word memory accesses on CPU allowing cheap + * unaligned memory reads. + * arrays = { byte1, byte2, byte3, byte4, byte6, byte7, pad1, pad2} + * + * Please note that alignment of addr1 & addr2 is only guaranted to be 16 bits. + */ + +static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2], + const u8 addr2[6+2]) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + unsigned long fold = ((*(unsigned long *)addr1) ^ + (*(unsigned long *)addr2)); + + if (sizeof(fold) == 8) + return zap_last_2bytes(fold) != 0; + + fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^ + (*(unsigned long *)(addr2 + 4))); + return fold != 0; +#else + return compare_ether_addr(addr1, addr2); +#endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_ETHERDEVICE_H */ -- cgit v1.2.3 From 3eb1aa43ef5cb871ba3fb2f08633675eca374d2e Mon Sep 17 00:00:00 2001 From: Jaya Kumar Date: Wed, 19 Nov 2008 16:58:50 -0500 Subject: Input: add support for Wacom W8001 penabled serial touchscreen The Wacom W8001 sensor is a sensor device (uses electromagnetic resonance) and it is interfaced via its serial microcontroller to the host. Signed-off-by: Jaya Kumar Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index 25641d9e0ea8..1bcb357a01a1 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -213,5 +213,6 @@ static inline void serio_unpin_driver(struct serio *serio) #define SERIO_ZHENHUA 0x36 #define SERIO_INEXIO 0x37 #define SERIO_TOUCHIT213 0x37 +#define SERIO_W8001 0x39 #endif -- cgit v1.2.3 From a2d781fc8d9b16113dd9440107d73c0f21d7cbef Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 19 Nov 2008 17:02:24 -0500 Subject: Input: libps2 - handle 0xfc responses from devices Signed-off-by: Dmitry Torokhov --- include/linux/libps2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libps2.h b/include/linux/libps2.h index afc413369101..b94534b7e266 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -18,11 +18,13 @@ #define PS2_RET_ID 0x00 #define PS2_RET_ACK 0xfa #define PS2_RET_NAK 0xfe +#define PS2_RET_ERR 0xfc #define PS2_FLAG_ACK 1 /* Waiting for ACK/NAK */ #define PS2_FLAG_CMD 2 /* Waiting for command to finish */ #define PS2_FLAG_CMD1 4 /* Waiting for the first byte of command response */ #define PS2_FLAG_WAITID 8 /* Command execiting is GET ID */ +#define PS2_FLAG_NAK 16 /* Last transmission was NAKed */ struct ps2dev { struct serio *serio; -- cgit v1.2.3 From 758b2cdc6f6a22c702bd8f2344382fb1270b2161 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 25 Nov 2008 02:35:04 +1030 Subject: sched: wrap sched_group and sched_domain cpumask accesses. Impact: trivial wrap of member accesses This eases the transition in the next patch. We also get rid of a temporary cpumask in find_idlest_cpu() thanks to for_each_cpu_and, and sched_balance_self() due to getting weight before setting sd to NULL. Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ce5c603c51a..2b95aa9f779b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -786,6 +786,11 @@ struct sched_group { u32 reciprocal_cpu_power; }; +static inline struct cpumask *sched_group_cpus(struct sched_group *sg) +{ + return &sg->cpumask; +} + enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, @@ -866,6 +871,11 @@ struct sched_domain { #endif }; +static inline struct cpumask *sched_domain_span(struct sched_domain *sd) +{ + return &sd->span; +} + extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); -- cgit v1.2.3 From 6c99e9ad47d9c082bd096f42fb49e397b05d58a8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 25 Nov 2008 02:35:04 +1030 Subject: sched: convert struct sched_group/sched_domain cpumask_ts to variable bitmaps Impact: (future) size reduction for large NR_CPUS. We move the 'cpumask' member of sched_group to the end, so when we kmalloc it we can do a minimal allocation: saves space for small nr_cpu_ids but big CONFIG_NR_CPUS. Similar trick for 'span' in sched_domain. This isn't quite as good as converting to a cpumask_var_t, as some sched_groups are actually static, but it's safer: we don't have to figure out where to call alloc_cpumask_var/free_cpumask_var. Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b95aa9f779b..c5be6c6bc741 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -771,7 +771,6 @@ enum cpu_idle_type { struct sched_group { struct sched_group *next; /* Must be a circular list */ - cpumask_t cpumask; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a @@ -784,11 +783,13 @@ struct sched_group { * (see include/linux/reciprocal_div.h) */ u32 reciprocal_cpu_power; + + unsigned long cpumask[]; }; static inline struct cpumask *sched_group_cpus(struct sched_group *sg) { - return &sg->cpumask; + return to_cpumask(sg->cpumask); } enum sched_domain_level { @@ -814,7 +815,6 @@ struct sched_domain { struct sched_domain *parent; /* top domain must be null terminated */ struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ - cpumask_t span; /* span of all CPUs in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ @@ -869,11 +869,14 @@ struct sched_domain { #ifdef CONFIG_SCHED_DEBUG char *name; #endif + + /* span of all CPUs in this domain */ + unsigned long span[]; }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) { - return &sd->span; + return to_cpumask(sd->span); } extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, -- cgit v1.2.3 From 6a7b3dc3440f7b5a9b67594af01ed562cdeb41e4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 25 Nov 2008 02:35:04 +1030 Subject: sched: convert nohz_cpu_mask to cpumask_var_t. Impact: (future) size reduction for large NR_CPUS. Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves space for small nr_cpu_ids but big CONFIG_NR_CPUS. cpumask_var_t is just a struct cpumask for !CONFIG_CPUMASK_OFFSTACK. Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5be6c6bc741..1e33e2cb7f8c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -249,7 +249,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(void); extern void task_rq_unlock_wait(struct task_struct *p); -extern cpumask_t nohz_cpu_mask; +extern cpumask_var_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); #else -- cgit v1.2.3 From 96f874e26428ab5d2db681c100210c254775e154 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 25 Nov 2008 02:35:14 +1030 Subject: sched: convert remaining old-style cpumask operators Impact: Trivial API conversion NR_CPUS -> nr_cpu_ids cpumask_t -> struct cpumask sizeof(cpumask_t) -> cpumask_size() cpumask_a = cpumask_b -> cpumask_copy(&cpumask_a, &cpumask_b) cpu_set() -> cpumask_set_cpu() first_cpu() -> cpumask_first() cpumask_of_cpu() -> cpumask_of() cpus_* -> cpumask_* There are some FIXMEs where we all archs to complete infrastructure (patches have been sent): cpu_coregroup_map -> cpu_coregroup_mask node_to_cpumask* -> cpumask_of_node There is also one FIXME where we pass an array of cpumasks to partition_sched_domains(): this implies knowing the definition of 'struct cpumask' and the size of a cpumask. This will be fixed in a future patch. Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/sched.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e33e2cb7f8c..4b7b0187374c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -879,7 +879,7 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) return to_cpumask(sd->span); } -extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, +extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); @@ -888,7 +888,7 @@ extern int arch_reinit_sched_domains(void); struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, cpumask_t *doms_new, +partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new) { } @@ -970,7 +970,7 @@ struct sched_class { void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, - const cpumask_t *newmask); + const struct cpumask *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); @@ -1612,12 +1612,12 @@ extern cputime_t task_gtime(struct task_struct *p); #ifdef CONFIG_SMP extern int set_cpus_allowed_ptr(struct task_struct *p, - const cpumask_t *new_mask); + const struct cpumask *new_mask); #else static inline int set_cpus_allowed_ptr(struct task_struct *p, - const cpumask_t *new_mask) + const struct cpumask *new_mask) { - if (!cpu_isset(0, *new_mask)) + if (!cpumask_test_cpu(0, new_mask)) return -EINVAL; return 0; } @@ -2230,8 +2230,8 @@ __trace_special(void *__tr, void *__data, } #endif -extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -extern long sched_getaffinity(pid_t pid, cpumask_t *mask); +extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); +extern long sched_getaffinity(pid_t pid, struct cpumask *mask); extern int sched_mc_power_savings, sched_smt_power_savings; -- cgit v1.2.3 From 1acdac104668a0834cfa267de9946fac7764d486 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Nov 2008 10:02:53 -0800 Subject: futex: make clock selectable for FUTEX_WAIT_BITSET FUTEX_WAIT_BITSET could be used instead of FUTEX_WAIT by setting the bit set to FUTEX_BITSET_MATCH_ANY, but FUTEX_WAIT uses CLOCK_REALTIME while FUTEX_WAIT_BITSET uses CLOCK_MONOTONIC. Add a flag to select CLOCK_REALTIME for FUTEX_WAIT_BITSET so glibc can replace the FUTEX_WAIT logic which needs to do gettimeofday() calls before and after the syscall to convert the absolute timeout to a relative timeout for FUTEX_WAIT. Signed-off-by: Thomas Gleixner Cc: Ulrich Drepper --- include/linux/futex.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 8f627b9ae2b1..3bf5bb5a34f9 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -25,7 +25,8 @@ union ktime; #define FUTEX_WAKE_BITSET 10 #define FUTEX_PRIVATE_FLAG 128 -#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG +#define FUTEX_CLOCK_REALTIME 256 +#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME) #define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) -- cgit v1.2.3 From 18b6e0414e42d95183f07d8177e3ff0241abd825 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Wed, 15 Oct 2008 16:38:45 -0500 Subject: User namespaces: set of cleanups (v2) The user_ns is moved from nsproxy to user_struct, so that a struct cred by itself is sufficient to determine access (which it otherwise would not be). Corresponding ecryptfs fixes (by David Howells) are here as well. Fix refcounting. The following rules now apply: 1. The task pins the user struct. 2. The user struct pins its user namespace. 3. The user namespace pins the struct user which created it. User namespaces are cloned during copy_creds(). Unsharing a new user_ns is no longer possible. (We could re-add that, but it'll cause code duplication and doesn't seem useful if PAM doesn't need to clone user namespaces). When a user namespace is created, its first user (uid 0) gets empty keyrings and a clean group_info. This incorporates a previous patch by David Howells. Here is his original patch description: >I suggest adding the attached incremental patch. It makes the following >changes: > > (1) Provides a current_user_ns() macro to wrap accesses to current's user > namespace. > > (2) Fixes eCryptFS. > > (3) Renames create_new_userns() to create_user_ns() to be more consistent > with the other associated functions and because the 'new' in the name is > superfluous. > > (4) Moves the argument and permission checks made for CLONE_NEWUSER to the > beginning of do_fork() so that they're done prior to making any attempts > at allocation. > > (5) Calls create_user_ns() after prepare_creds(), and gives it the new creds > to fill in rather than have it return the new root user. I don't imagine > the new root user being used for anything other than filling in a cred > struct. > > This also permits me to get rid of a get_uid() and a free_uid(), as the > reference the creds were holding on the old user_struct can just be > transferred to the new namespace's creator pointer. > > (6) Makes create_user_ns() reset the UIDs and GIDs of the creds under > preparation rather than doing it in copy_creds(). > >David >Signed-off-by: David Howells Changelog: Oct 20: integrate dhowells comments 1. leave thread_keyring alone 2. use current_user_ns() in set_user() Signed-off-by: Serge Hallyn --- include/linux/cred.h | 2 ++ include/linux/init_task.h | 1 - include/linux/nsproxy.h | 1 - include/linux/sched.h | 1 + include/linux/user_namespace.h | 13 ++++--------- 5 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 26c1ab179946..3282ee4318e7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -60,6 +60,7 @@ do { \ } while (0) extern struct group_info *groups_alloc(int); +extern struct group_info init_groups; extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); @@ -315,6 +316,7 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) +#define current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) #define current_uid_gid(_uid, _gid) \ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2597858035cd..959f5522d10a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -57,7 +57,6 @@ extern struct nsproxy init_nsproxy; .mnt_ns = NULL, \ INIT_NET_NS(net_ns) \ INIT_IPC_NS(ipc_ns) \ - .user_ns = &init_user_ns, \ } #define INIT_SIGHAND(sighand) { \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index c8a768e59640..afad7dec1b36 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -27,7 +27,6 @@ struct nsproxy { struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; - struct user_namespace *user_ns; struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 2036e9f26020..7f8015a3082e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -638,6 +638,7 @@ struct user_struct { /* Hash table maintenance information */ struct hlist_node uidhash_node; uid_t uid; + struct user_namespace *user_ns; #ifdef CONFIG_USER_SCHED struct task_group *tg; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b5f41d4c2eec..315bcd375224 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -12,7 +12,7 @@ struct user_namespace { struct kref kref; struct hlist_head uidhash_table[UIDHASH_SZ]; - struct user_struct *root_user; + struct user_struct *creator; }; extern struct user_namespace init_user_ns; @@ -26,8 +26,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) return ns; } -extern struct user_namespace *copy_user_ns(int flags, - struct user_namespace *old_ns); +extern int create_user_ns(struct cred *new); extern void free_user_ns(struct kref *kref); static inline void put_user_ns(struct user_namespace *ns) @@ -43,13 +42,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) return &init_user_ns; } -static inline struct user_namespace *copy_user_ns(int flags, - struct user_namespace *old_ns) +static inline int create_user_ns(struct cred *new) { - if (flags & CLONE_NEWUSER) - return ERR_PTR(-EINVAL); - - return old_ns; + return -EINVAL; } static inline void put_user_ns(struct user_namespace *ns) -- cgit v1.2.3 From 832d11c5cd076abc0aa1eaf7be96c81d1a59ce41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 24 Nov 2008 21:20:15 -0800 Subject: tcp: Try to restore large SKBs while SACK processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During SACK processing, most of the benefits of TSO are eaten by the SACK blocks that one-by-one fragment SKBs to MSS sized chunks. Then we're in problems when cleanup work for them has to be done when a large cumulative ACK comes. Try to return back to pre-split state already while more and more SACK info gets discovered by combining newly discovered SACK areas with the previous skb if that's SACKed as well. This approach has a number of benefits: 1) The processing overhead is spread more equally over the RTT 2) Write queue has less skbs to process (affect everything which has to walk in the queue past the sacked areas) 3) Write queue is consistent whole the time, so no other parts of TCP has to be aware of this (this was not the case with some other approach that was, well, quite intrusive all around). 4) Clean_rtx_queue can release most of the pages using single put_page instead of previous PAGE_SIZE/mss+1 calls In case a hole is fully filled by the new SACK block, we attempt to combine the next skb too which allows construction of skbs that are even larger than what tso split them to and it handles hole per on every nth patterns that often occur during slow start overshoot pretty nicely. Though this to be really useful also a retransmission would have to get lost since cumulative ACKs advance one hole at a time in the most typical case. TODO: handle upwards only merging. That should be rather easy when segment is fully sacked but I'm leaving that as future work item (it won't make very large difference anyway since this current approach already covers quite a lot of normal cases). I was earlier thinking of some sophisticated way of tracking timestamps of the first and the last segment but later on realized that it won't be that necessary at all to store the timestamp of the last segment. The cases that can occur are basically either: 1) ambiguous => no sensible measurement can be taken anyway 2) non-ambiguous is due to reordering => having the timestamp of the last segment there is just skewing things more off than does some good since the ack got triggered by one of the holes (besides some substle issues that would make determining right hole/skb even harder problem). Anyway, it has nothing to do with this change then. I choose to route some abnormal looking cases with goto noop, some could be handled differently (eg., by stopping the walking at that skb but again). In general, they either shouldn't happen at all or are rare enough to make no difference in practice. In theory this change (as whole) could cause some macroscale regression (global) because of cache misses that are taken over the round-trip time but it gets very likely better because of much less (local) cache misses per other write queue walkers and the big recovery clearing cumulative ack. Worth to note that these benefits would be very easy to get also without TSO/GSO being on as long as the data is in pages so that we can merge them. Currently I won't let that happen because DSACK splitting at fragment that would mess up pcounts due to sk_can_gso in tcp_set_skb_tso_segs. Once DSACKs fragments gets avoided, we have some conditions that can be made less strict. TODO: I will probably have to convert the excessive pointer passing to struct sacktag_state... :-) My testing revealed that considerable amount of skbs couldn't be shifted because they were cloned (most likely still awaiting tx reclaim)... [The rest is considering future work instead since I got repeatably EFAULT to tcpdump's recvfrom when I added pskb_expand_head to deal with clones, so I separated that into another, later patch] ...To counter that, I gave up on the fifth advantage: 5) When growing previous SACK block, less allocs for new skbs are done, basically a new alloc is needed only when new hole is detected and when the previous skb runs out of frags space ...which now only happens of if reclaim is fast enough to dispose the clone before the SACK block comes in (the window is RTT long), otherwise we'll have to alloc some. With clones being handled I got these numbers (will be somewhat worse without that), taken with fine-grained mibs: TCPSackShifted 398 TCPSackMerged 877 TCPSackShiftFallback 320 TCPSACKCOLLAPSEFALLBACKGSO 0 TCPSACKCOLLAPSEFALLBACKSKBBITS 0 TCPSACKCOLLAPSEFALLBACKSKBDATA 0 TCPSACKCOLLAPSEFALLBACKBELOW 0 TCPSACKCOLLAPSEFALLBACKFIRST 1 TCPSACKCOLLAPSEFALLBACKPREVBITS 318 TCPSACKCOLLAPSEFALLBACKMSS 1 TCPSACKCOLLAPSEFALLBACKNOHEAD 0 TCPSACKCOLLAPSEFALLBACKSHIFT 0 TCPSACKCOLLAPSENOOPSEQ 0 TCPSACKCOLLAPSENOOPSMALLPCOUNT 0 TCPSACKCOLLAPSENOOPSMALLLEN 0 TCPSACKCOLLAPSEHOLE 12 Signed-off-by: Ilpo JĂ€rvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a01b6f84e3bc..acf17af45af9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -492,6 +492,19 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, return (skb->next == (struct sk_buff *) list); } +/** + * skb_queue_is_first - check if skb is the first entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the first buffer on the list. + */ +static inline bool skb_queue_is_first(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->prev == (struct sk_buff *) list); +} + /** * skb_queue_next - return the next packet in the queue * @list: queue head @@ -510,6 +523,24 @@ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, return skb->next; } +/** + * skb_queue_prev - return the prev packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the prev packet in @list before @skb. It is only valid to + * call this if skb_queue_is_first() evaluates to false. + */ +static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_first(list, skb)); + return skb->prev; +} + /** * skb_get - reference buffer * @skb: buffer to reference @@ -1652,6 +1683,8 @@ extern int skb_splice_bits(struct sk_buff *skb, extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, + int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); -- cgit v1.2.3 From 111cc8b913b42ef07793648b1699288332f273e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 24 Nov 2008 21:27:22 -0800 Subject: tcp: add some mibs to track collapsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo JĂ€rvinen Signed-off-by: David S. Miller --- include/linux/snmp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 7a6e6bba4a71..aee3f1e1d1ce 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -216,6 +216,9 @@ enum LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */ LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */ + LINUX_MIB_SACKSHIFTED, + LINUX_MIB_SACKMERGED, + LINUX_MIB_SACKSHIFTFALLBACK, __LINUX_MIB_MAX }; -- cgit v1.2.3 From 14bfc987e395797dfe03e915e8b4c7fc9e5078e4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 25 Nov 2008 08:58:11 +0100 Subject: tracing, tty: fix warnings caused by branch tracing and tty_kref_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stephen Rothwell reported tht this warning started triggering in linux-next: In file included from init/main.c:27: include/linux/tty.h: In function ‘tty_kref_get’: include/linux/tty.h:330: warning: ‘______f’ is static but declared in inline function ‘tty_kref_get’ which is not static Which gcc emits for 'extern inline' functions that nevertheless define static variables. Change it to 'static inline', which is the norm in the kernel anyway. Reported-by: Stephen Rothwell Signed-off-by: Ingo Molnar --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 3b8121d4e36f..eaec37c9d83d 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -325,7 +325,7 @@ extern struct class *tty_class; * go away */ -extern inline struct tty_struct *tty_kref_get(struct tty_struct *tty) +static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) kref_get(&tty->kref); -- cgit v1.2.3 From 47fd5b8373ecc6bf5473e4139b62b06425448252 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Nov 2008 00:20:43 -0800 Subject: netdev: add HAVE_NET_DEVICE_OPS As a concession to vendors who have to deal with one source for different kernel versions, add a HAVE_NET_DEVICE_OPS so they don't end up hard coding ifdef against kernel version. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6095af572dfd..76a89f8e6a19 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -545,6 +545,7 @@ struct netdev_queue { * * void (*ndo_poll_controller)(struct net_device *dev); */ +#define HAVE_NET_DEVICE_OPS struct net_device_ops { int (*ndo_init)(struct net_device *dev); void (*ndo_uninit)(struct net_device *dev); -- cgit v1.2.3 From 7a6b6f515f77d1c62a2f383b6dce18cb0af0cf4f Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Tue, 25 Nov 2008 01:02:08 -0800 Subject: DCB: fix kconfig option Since the netlink option for DCB is necessary to actually be useful, simplified the Kconfig option. In addition, added useful help text for the Kconfig option. Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76a89f8e6a19..0df0db068ac3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,7 +43,7 @@ #include #include -#ifdef CONFIG_DCBNL +#ifdef CONFIG_DCB #include #endif @@ -847,7 +847,7 @@ struct net_device #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; -#ifdef CONFIG_DCBNL +#ifdef CONFIG_DCB /* Data Center Bridging netlink ops */ struct dcbnl_rtnl_ops *dcbnl_ops; #endif -- cgit v1.2.3 From ca109491f612aab5c8152207631c0444f63da97f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Nov 2008 12:43:51 +0100 Subject: hrtimer: removing all ur callback modes Impact: cleanup, move all hrtimer processing into hardirq context This is an attempt at removing some of the hrtimer complexity by reducing the number of callback modes to 1. This means that all hrtimer callback functions will be ran from HARD-irq context. I went through all the 30 odd hrtimer callback functions in the kernel and saw only one that I'm not quite sure of, which is the one in net/can/bcm.c - hence I'm CC-ing the folks responsible for that code. Furthermore, the hrtimer core now calls callbacks directly with IRQs disabled in case you try to enqueue an expired timer. If this timer is a periodic timer (which should use hrtimer_forward() to advance its time) then it might be possible to end up in an inf. recursive loop due to the fact that hrtimer_forward() doesn't round up to the next timer granularity, and therefore keeps on calling the callback - obviously this needs a fix. Aside from that, this seems to compile and actually boot on my dual core test box - although I'm sure there are some bugs in, me not hitting any makes me certain :-) Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 34 ++-------------------------------- include/linux/interrupt.h | 3 --- 2 files changed, 2 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 3eba43878dcb..bd37078c2d7d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -42,26 +42,6 @@ enum hrtimer_restart { HRTIMER_RESTART, /* Timer must be restarted */ }; -/* - * hrtimer callback modes: - * - * HRTIMER_CB_SOFTIRQ: Callback must run in softirq context - * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context - * Special mode for tick emulation and - * scheduler timer. Such timers are per - * cpu and not allowed to be migrated on - * cpu unplug. - * HRTIMER_CB_IRQSAFE_UNLOCKED: Callback should run in hardirq context - * with timer->base lock unlocked - * used for timers which call wakeup to - * avoid lock order problems with rq->lock - */ -enum hrtimer_cb_mode { - HRTIMER_CB_SOFTIRQ, - HRTIMER_CB_IRQSAFE_PERCPU, - HRTIMER_CB_IRQSAFE_UNLOCKED, -}; - /* * Values to track state of the timer * @@ -70,7 +50,6 @@ enum hrtimer_cb_mode { * 0x00 inactive * 0x01 enqueued into rbtree * 0x02 callback function running - * 0x04 callback pending (high resolution mode) * * Special cases: * 0x03 callback function running and enqueued @@ -92,8 +71,7 @@ enum hrtimer_cb_mode { #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 #define HRTIMER_STATE_CALLBACK 0x02 -#define HRTIMER_STATE_PENDING 0x04 -#define HRTIMER_STATE_MIGRATE 0x08 +#define HRTIMER_STATE_MIGRATE 0x04 /** * struct hrtimer - the basic hrtimer structure @@ -109,8 +87,6 @@ enum hrtimer_cb_mode { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) - * @cb_mode: high resolution timer feature to select the callback execution - * mode * @cb_entry: list head to enqueue an expired timer into the callback list * @start_site: timer statistics field to store the site where the timer * was started @@ -129,7 +105,6 @@ struct hrtimer { struct hrtimer_clock_base *base; unsigned long state; struct list_head cb_entry; - enum hrtimer_cb_mode cb_mode; #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; @@ -188,15 +163,11 @@ struct hrtimer_clock_base { * @check_clocks: Indictator, when set evaluate time source and clock * event devices whether high resolution mode can be * activated. - * @cb_pending: Expired timers are moved from the rbtree to this - * list in the timer interrupt. The list is processed - * in the softirq. * @nr_events: Total number of timer interrupt events */ struct hrtimer_cpu_base { spinlock_t lock; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; - struct list_head cb_pending; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; @@ -404,8 +375,7 @@ static inline int hrtimer_active(const struct hrtimer *timer) */ static inline int hrtimer_is_queued(struct hrtimer *timer) { - return timer->state & - (HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING); + return timer->state & HRTIMER_STATE_ENQUEUED; } /* diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f58a0cf8929a..d6210a97a8ca 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -251,9 +251,6 @@ enum BLOCK_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, -#ifdef CONFIG_HIGH_RES_TIMERS - HRTIMER_SOFTIRQ, -#endif RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS -- cgit v1.2.3 From ca0002a179bfa532d009a9272d619732872c49bd Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 09:01:25 +0100 Subject: x86, bts: base in-kernel ds interface on handles Impact: generalize the DS code to shared buffers Change the in-kernel ds.h interface to identify the tracer via a handle returned on ds_request_~(). Tracers used to be identified via their task_struct. The changes are required to allow DS to be shared between different tasks, which is needed for perfmon2 and for ftrace. For ptrace, the handle is stored in the traced task's task_struct. This should probably go into a (arch-specific) ptrace context some time. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- include/linux/sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index bee1e93c95ad..a9780eaa6737 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -96,6 +96,7 @@ struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio; +struct bts_tracer; /* * List of flags we want to share for kernel threads, @@ -1161,6 +1162,14 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; +#ifdef CONFIG_X86_PTRACE_BTS + /* + * This is the tracer handle for the ptrace BTS extension. + * This field actually belongs to the ptracer task. + */ + struct bts_tracer *bts; +#endif /* CONFIG_X86_PTRACE_BTS */ + /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; -- cgit v1.2.3 From 6abb11aecd888d1da6276399380b7355f127c006 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 09:05:27 +0100 Subject: x86, bts, ptrace: move BTS buffer allocation from ds.c into ptrace.c Impact: restructure DS memory allocation to be done by the usage site of DS Require pre-allocated buffers in ds.h. Move the BTS buffer allocation for ptrace into ptrace.c. The pointer to the allocated buffer is stored in the traced task's task_struct together with the handle returned by ds_request_bts(). Removes memory accounting code. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a9780eaa6737..d02a0ca70ee9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1168,6 +1168,10 @@ struct task_struct { * This field actually belongs to the ptracer task. */ struct bts_tracer *bts; + /* + * The buffer to hold the BTS data. + */ + void *bts_buffer; #endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ -- cgit v1.2.3 From 3f2355cb9111ac04e7ae06a4d7044da2ae813863 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 12 Nov 2008 14:22:02 -0800 Subject: cfg80211/mac80211: Add 802.11d support This adds country IE parsing to mac80211 and enables its usage within the new regulatory infrastructure in cfg80211. We parse the country IEs only on management beacons for the BSSID you are associated to and disregard the IEs when the country and environment (indoor, outdoor, any) matches the already processed country IE. To avoid following misinformed or outdated APs we build and use a regulatory domain out of the intersection between what the AP provides us on the country IE and what CRDA is aware is allowed on the same country. A secondary device is allowed to follow only the same country IE as it make no sense for two devices on a system to be in two different countries. In the case the AP is using country IEs for an incorrect country the user may help compliance further by setting the regulatory domain before or after the IE is parsed and in that case another intersection will be performed. CONFIG_WIRELESS_OLD_REGULATORY is supported but requires CRDA present. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 56b0eb25d927..a6ec928186ad 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1042,6 +1042,68 @@ enum ieee80211_spectrum_mgmt_actioncode { WLAN_ACTION_SPCT_CHL_SWITCH = 4, }; +/* + * IEEE 802.11-2007 7.3.2.9 Country information element + * + * Minimum length is 8 octets, ie len must be evenly + * divisible by 2 + */ + +/* Although the spec says 8 I'm seeing 6 in practice */ +#define IEEE80211_COUNTRY_IE_MIN_LEN 6 + +/* + * For regulatory extension stuff see IEEE 802.11-2007 + * Annex I (page 1141) and Annex J (page 1147). Also + * review 7.3.2.9. + * + * When dot11RegulatoryClassesRequired is true and the + * first_channel/reg_extension_id is >= 201 then the IE + * compromises of the 'ext' struct represented below: + * + * - Regulatory extension ID - when generating IE this just needs + * to be monotonically increasing for each triplet passed in + * the IE + * - Regulatory class - index into set of rules + * - Coverage class - index into air propagation time (Table 7-27), + * in microseconds, you can compute the air propagation time from + * the index by multiplying by 3, so index 10 yields a propagation + * of 10 us. Valid values are 0-31, values 32-255 are not defined + * yet. A value of 0 inicates air propagation of <= 1 us. + * + * See also Table I.2 for Emission limit sets and table + * I.3 for Behavior limit sets. Table J.1 indicates how to map + * a reg_class to an emission limit set and behavior limit set. + */ +#define IEEE80211_COUNTRY_EXTENSION_ID 201 + +/* + * Channels numbers in the IE must be monotonically increasing + * if dot11RegulatoryClassesRequired is not true. + * + * If dot11RegulatoryClassesRequired is true consecutive + * subband triplets following a regulatory triplet shall + * have monotonically increasing first_channel number fields. + * + * Channel numbers shall not overlap. + * + * Note that max_power is signed. + */ +struct ieee80211_country_ie_triplet { + union { + struct { + u8 first_channel; + u8 num_channels; + s8 max_power; + } __attribute__ ((packed)) chans; + struct { + u8 reg_extension_id; + u8 reg_class; + u8 coverage_class; + } __attribute__ ((packed)) ext; + }; +} __attribute__ ((packed)); + /* BACK action code */ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, -- cgit v1.2.3 From fb52607afcd0629776f1dc9e657647ceae81dd50 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 25 Nov 2008 21:07:04 +0100 Subject: tracing/function-return-tracer: change the name into function-graph-tracer Impact: cleanup This patch changes the name of the "return function tracer" into function-graph-tracer which is a more suitable name for a tracing which makes one able to retrieve the ordered call stack during the code flow. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 24 ++++++++++++------------ include/linux/ftrace_irq.h | 2 +- include/linux/sched.h | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7854d87b97b2..b4ac734ad8d6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -115,8 +115,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -#ifdef CONFIG_FUNCTION_RET_TRACER -extern void ftrace_return_caller(void); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern void ftrace_graph_caller(void); #endif /** @@ -315,7 +315,7 @@ ftrace_init_module(struct module *mod, /* * Structure that defines a return function trace. */ -struct ftrace_retfunc { +struct ftrace_graph_ret { unsigned long ret; /* Return address */ unsigned long func; /* Current function */ unsigned long long calltime; @@ -324,22 +324,22 @@ struct ftrace_retfunc { unsigned long overrun; }; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ -typedef void (*trace_function_return_t)(struct ftrace_retfunc *); +typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *); -extern int register_ftrace_return(trace_function_return_t func); +extern int register_ftrace_graph(trace_function_graph_t func); /* The current handler in use */ -extern trace_function_return_t ftrace_function_return; -extern void unregister_ftrace_return(void); +extern trace_function_graph_t ftrace_graph_function; +extern void unregister_ftrace_graph(void); -extern void ftrace_retfunc_init_task(struct task_struct *t); -extern void ftrace_retfunc_exit_task(struct task_struct *t); +extern void ftrace_graph_init_task(struct task_struct *t); +extern void ftrace_graph_exit_task(struct task_struct *t); #else -static inline void ftrace_retfunc_init_task(struct task_struct *t) { } -static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } +static inline void ftrace_graph_init_task(struct task_struct *t) { } +static inline void ftrace_graph_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 0b4df55d7a74..366a054d0b05 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/sched.h b/include/linux/sched.h index d02a0ca70ee9..7ad48f2a2758 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1365,7 +1365,7 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored adress in ret_stack */ int curr_ret_stack; /* Stack of return addresses for return function tracing */ -- cgit v1.2.3 From 287b6e68ca7209caec40b2f44f837c580a413bae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Nov 2008 00:57:25 +0100 Subject: tracing/function-return-tracer: set a more human readable output Impact: feature This patch sets a C-like output for the function graph tracing. For this aim, we now call two handler for each function: one on the entry and one other on return. This way we can draw a well-ordered call stack. The pid of the previous trace is loosely stored to be compared against the one of the current trace to see if there were a context switch. Without this little feature, the call tree would seem broken at some locations. We could use the sched_tracer to capture these sched_events but this way of processing is much more simpler. 2 spaces have been chosen for indentation to fit the screen while deep calls. The time of execution in nanosecs is printed just after closed braces, it seems more easy this way to find the corresponding function. If the time was printed as a first column, it would be not so easy to find the corresponding function if it is called on a deep depth. I plan to output the return value but on 32 bits CPU, the return value can be 32 or 64, and its difficult to guess on which case we are. I don't know what would be the better solution on X86-32: only print eax (low-part) or even edx (high-part). Actually it's thee same problem when a function return a 8 bits value, the high part of eax could contain junk values... Here is an example of trace: sys_read() { fget_light() { } 526 vfs_read() { rw_verify_area() { security_file_permission() { cap_file_permission() { } 519 } 1564 } 2640 do_sync_read() { pipe_read() { __might_sleep() { } 511 pipe_wait() { prepare_to_wait() { } 760 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { update_min_vruntime() { } 504 } 1587 clear_buddies() { } 512 add_cfs_task_weight() { } 519 update_min_vruntime() { } 511 } 5602 dequeue_entity() { update_curr() { update_min_vruntime() { } 496 } 1631 clear_buddies() { } 496 update_min_vruntime() { } 527 } 4580 hrtick_update() { hrtick_start_fair() { } 488 } 1489 } 13700 } 14949 } 16016 msecs_to_jiffies() { } 496 put_prev_task_fair() { } 504 pick_next_task_fair() { } 489 pick_next_task_rt() { } 496 pick_next_task_fair() { } 489 pick_next_task_idle() { } 489 ------------8<---------- thread 4 ------------8<---------- finish_task_switch() { } 1203 do_softirq() { __do_softirq() { __local_bh_disable() { } 669 rcu_process_callbacks() { __rcu_process_callbacks() { cpu_quiet() { rcu_start_batch() { } 503 } 1647 } 3128 __rcu_process_callbacks() { } 542 } 5362 _local_bh_enable() { } 587 } 8880 } 9986 kthread_should_stop() { } 669 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { calc_delta_mine() { } 511 update_min_vruntime() { } 511 } 2813 Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b4ac734ad8d6..fc2d54987198 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -312,27 +312,40 @@ ftrace_init_module(struct module *mod, #endif +/* + * Structure that defines an entry function trace. + */ +struct ftrace_graph_ent { + unsigned long func; /* Current function */ + int depth; +}; + /* * Structure that defines a return function trace. */ struct ftrace_graph_ret { - unsigned long ret; /* Return address */ unsigned long func; /* Current function */ unsigned long long calltime; unsigned long long rettime; /* Number of functions that overran the depth limit for current task */ unsigned long overrun; + int depth; }; #ifdef CONFIG_FUNCTION_GRAPH_TRACER #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 -/* Type of a callback handler of tracing return function */ -typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *); +/* Type of the callback handlers for tracing function graph*/ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ +typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ + +extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc); + +/* The current handlers in use */ +extern trace_func_graph_ret_t ftrace_graph_return; +extern trace_func_graph_ent_t ftrace_graph_entry; -extern int register_ftrace_graph(trace_function_graph_t func); -/* The current handler in use */ -extern trace_function_graph_t ftrace_graph_function; extern void unregister_ftrace_graph(void); extern void ftrace_graph_init_task(struct task_struct *t); -- cgit v1.2.3 From 5a45cfe1c64862e8cd3b0d79d7c4ba71c3118915 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:24 -0500 Subject: ftrace: use code patching for ftrace graph tracer Impact: more efficient code for ftrace graph tracer This patch uses the dynamic patching, when available, to patch the function graph code into the kernel. This patch will ease the way for letting both function tracing and function graph tracing run together. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fc2d54987198..f9792c0d73f6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -117,6 +117,11 @@ extern void ftrace_call(void); extern void mcount_call(void); #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); +extern int ftrace_enable_ftrace_graph_caller(void); +extern int ftrace_disable_ftrace_graph_caller(void); +#else +static inline int ftrace_enable_ftrace_graph_caller(void) { return 0; } +static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } #endif /** -- cgit v1.2.3 From f3f47a6768a29448866da4422b6f6bee485c947f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 23 Nov 2008 16:49:58 -0800 Subject: tracing: add "power-tracer": C/P state tracer to help power optimization Impact: new "power-tracer" ftrace plugin This patch adds a C/P-state ftrace plugin that will generate detailed statistics about the C/P-states that are being used, so that we can look at detailed decisions that the C/P-state code is making, rather than the too high level "average" that we have today. An example way of using this is: mount -t debugfs none /sys/kernel/debug echo cstate > /sys/kernel/debug/tracing/current_tracer echo 1 > /sys/kernel/debug/tracing/tracing_enabled sleep 1 echo 0 > /sys/kernel/debug/tracing/tracing_enabled cat /sys/kernel/debug/tracing/trace | perl scripts/trace/cstate.pl > out.svg Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7854d87b97b2..0df288666201 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -311,6 +311,35 @@ ftrace_init_module(struct module *mod, unsigned long *start, unsigned long *end) { } #endif +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; + +struct power_trace { +#ifdef CONFIG_POWER_TRACER + ktime_t stamp; + ktime_t end; + int type; + int state; +#endif +}; + +#ifdef CONFIG_POWER_TRACER +extern void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_end(struct power_trace *it); +#else +static inline void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_end(struct power_trace *it) { } +#endif + /* * Structure that defines a return function trace. -- cgit v1.2.3 From 193da6092764ab693da7170c5badbf60d7758c1d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Nov 2008 12:03:54 +0100 Subject: fuse: move FUSE_MINOR to miscdevice.h Move FUSE_MINOR to miscdevice.h. While at it, de-uglify the file. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 3 --- include/linux/miscdevice.h | 42 +++++++++++++++++++++--------------------- 2 files changed, 21 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 350fe9767bbc..7caa473306e4 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -40,9 +40,6 @@ /** The major number of the fuse character device */ #define FUSE_MAJOR MISC_MAJOR -/** The minor number of the fuse character device */ -#define FUSE_MINOR 229 - /* Make sure all structures are padded to 64bit boundary, so 32bit userspace works under 64bit kernels */ diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 26433ec520b3..a820f816a49e 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -3,33 +3,33 @@ #include #include -#define PSMOUSE_MINOR 1 -#define MS_BUSMOUSE_MINOR 2 -#define ATIXL_BUSMOUSE_MINOR 3 -/*#define AMIGAMOUSE_MINOR 4 FIXME OBSOLETE */ -#define ATARIMOUSE_MINOR 5 -#define SUN_MOUSE_MINOR 6 -#define APOLLO_MOUSE_MINOR 7 -#define PC110PAD_MINOR 9 -/*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */ +#define PSMOUSE_MINOR 1 +#define MS_BUSMOUSE_MINOR 2 +#define ATIXL_BUSMOUSE_MINOR 3 +/*#define AMIGAMOUSE_MINOR 4 FIXME OBSOLETE */ +#define ATARIMOUSE_MINOR 5 +#define SUN_MOUSE_MINOR 6 +#define APOLLO_MOUSE_MINOR 7 +#define PC110PAD_MINOR 9 +/*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */ #define WATCHDOG_MINOR 130 /* Watchdog timer */ #define TEMP_MINOR 131 /* Temperature Sensor */ -#define RTC_MINOR 135 +#define RTC_MINOR 135 #define EFI_RTC_MINOR 136 /* EFI Time services */ -#define SUN_OPENPROM_MINOR 139 +#define SUN_OPENPROM_MINOR 139 #define DMAPI_MINOR 140 /* DMAPI */ -#define NVRAM_MINOR 144 -#define SGI_MMTIMER 153 +#define NVRAM_MINOR 144 +#define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 -#define I2O_MINOR 166 +#define I2O_MINOR 166 #define MICROCODE_MINOR 184 -#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ -#define MPT_MINOR 220 -#define MISC_DYNAMIC_MINOR 255 - -#define TUN_MINOR 200 -#define HPET_MINOR 228 -#define KVM_MINOR 232 +#define TUN_MINOR 200 +#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ +#define MPT_MINOR 220 +#define HPET_MINOR 228 +#define FUSE_MINOR 229 +#define KVM_MINOR 232 +#define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.2.3 From 59efec7b903987dcb60b9ebc85c7acd4443a11a1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Nov 2008 12:03:55 +0100 Subject: fuse: implement ioctl support Generic ioctl support is tricky to implement because only the ioctl implementation itself knows which memory regions need to be read and/or written. To support this, fuse client can request retry of ioctl specifying memory regions to read and write. Deep copying (nested pointers) can be implemented by retrying multiple times resolving one depth of dereference at a time. For security and cleanliness considerations, ioctl implementation has restricted mode where the kernel determines data transfer directions and sizes using the _IOC_*() macros on the ioctl command. In this mode, retry is not allowed. For all FUSE servers, restricted mode is enforced. Unrestricted ioctl will be used by CUSE. Plese read the comment on top of fs/fuse/file.c::fuse_file_do_ioctl() for more information. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 7caa473306e4..608e300ae883 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -148,6 +148,21 @@ struct fuse_file_lock { */ #define FUSE_READ_LOCKOWNER (1 << 1) +/** + * Ioctl flags + * + * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine + * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed + * FUSE_IOCTL_RETRY: retry with new iovecs + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +#define FUSE_IOCTL_COMPAT (1 << 0) +#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +#define FUSE_IOCTL_RETRY (1 << 2) + +#define FUSE_IOCTL_MAX_IOV 256 + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -185,6 +200,7 @@ enum fuse_opcode { FUSE_INTERRUPT = 36, FUSE_BMAP = 37, FUSE_DESTROY = 38, + FUSE_IOCTL = 39, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -385,6 +401,22 @@ struct fuse_bmap_out { __u64 block; }; +struct fuse_ioctl_in { + __u64 fh; + __u32 flags; + __u32 cmd; + __u64 arg; + __u32 in_size; + __u32 out_size; +}; + +struct fuse_ioctl_out { + __s32 result; + __u32 flags; + __u32 in_iovs; + __u32 out_iovs; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.3 From 8599396b5062bf6bd2a0b433503849e2322df1c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Nov 2008 12:03:55 +0100 Subject: fuse: implement unsolicited notification Clients always used to write only in response to read requests. To implement poll efficiently, clients should be able to issue unsolicited notifications. This patch implements basic notification support. Zero fuse_out_header.unique is now accepted and considered unsolicited notification and the error field contains notification code. This patch doesn't implement any actual notification. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 608e300ae883..abde9949e2c0 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -203,6 +203,10 @@ enum fuse_opcode { FUSE_IOCTL = 39, }; +enum fuse_notify_code { + FUSE_NOTIFY_CODE_MAX, +}; + /* The read buffer is required to be at least 8k, but may be much larger */ #define FUSE_MIN_READ_BUFFER 8192 -- cgit v1.2.3 From 95668a69a4bb862063c4d28a746e55107dee7b98 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Nov 2008 12:03:55 +0100 Subject: fuse: implement poll support Implement poll support. Polled files are indexed using kh in a RB tree rooted at fuse_conn->polled_files. Client should send FUSE_NOTIFY_POLL notification once after processing FUSE_POLL which has FUSE_POLL_SCHEDULE_NOTIFY set. Sending notification unconditionally after the latest poll or everytime file content might have changed is inefficient but won't cause malfunction. fuse_file_poll() can sleep and requires patches from the following thread which allows f_op->poll() to sleep. http://thread.gmane.org/gmane.linux.kernel/726176 Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index abde9949e2c0..5650cf033e73 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -163,6 +163,13 @@ struct fuse_file_lock { #define FUSE_IOCTL_MAX_IOV 256 +/** + * Poll flags + * + * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify + */ +#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -201,9 +208,11 @@ enum fuse_opcode { FUSE_BMAP = 37, FUSE_DESTROY = 38, FUSE_IOCTL = 39, + FUSE_POLL = 40, }; enum fuse_notify_code { + FUSE_NOTIFY_POLL = 1, FUSE_NOTIFY_CODE_MAX, }; @@ -421,6 +430,22 @@ struct fuse_ioctl_out { __u32 out_iovs; }; +struct fuse_poll_in { + __u64 fh; + __u64 kh; + __u32 flags; + __u32 padding; +}; + +struct fuse_poll_out { + __u32 revents; + __u32 padding; +}; + +struct fuse_notify_poll_wakeup_out { + __u64 kh; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.3 From 5f3ea37c7716db4e894a480e0c18b24399595b6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Oct 2008 08:34:33 +0100 Subject: blktrace: port to tracepoints This was a forward port of work done by Mathieu Desnoyers, I changed it to encode the 'what' parameter on the tracepoint name, so that one can register interest in specific events and not on classes of events to then check the 'what' parameter. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Jens Axboe Signed-off-by: Ingo Molnar --- include/linux/blktrace_api.h | 172 +------------------------------------------ 1 file changed, 3 insertions(+), 169 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index bdf505d33e77..1dba3493d520 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -160,7 +160,6 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); extern int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); @@ -186,168 +185,8 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); } while (0) #define BLK_TN_MAX_MSG 128 -/** - * blk_add_trace_rq - Add a trace for a request oriented action - * @q: queue the io is for - * @rq: the source request - * @what: the action - * - * Description: - * Records an action against a request. Will log the bio offset + size. - * - **/ -static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, - u32 what) -{ - struct blk_trace *bt = q->blk_trace; - int rw = rq->cmd_flags & 0x03; - - if (likely(!bt)) - return; - - if (blk_discard_rq(rq)) - rw |= (1 << BIO_RW_DISCARD); - - if (blk_pc_request(rq)) { - what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); - } else { - what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL); - } -} - -/** - * blk_add_trace_bio - Add a trace for a bio oriented action - * @q: queue the io is for - * @bio: the source bio - * @what: the action - * - * Description: - * Records an action against a bio. Will log the bio offset + size. - * - **/ -static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL); -} - -/** - * blk_add_trace_generic - Add a trace for a generic action - * @q: queue the io is for - * @bio: the source bio - * @rw: the data direction - * @what: the action - * - * Description: - * Records a simple trace - * - **/ -static inline void blk_add_trace_generic(struct request_queue *q, - struct bio *bio, int rw, u32 what) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - if (bio) - blk_add_trace_bio(q, bio, what); - else - __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL); -} - -/** - * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload - * @q: queue the io is for - * @what: the action - * @bio: the source bio - * @pdu: the integer payload - * - * Description: - * Adds a trace with some integer payload. This might be an unplug - * option given as the action, with the depth at unplug time given - * as the payload - * - **/ -static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what, - struct bio *bio, unsigned int pdu) -{ - struct blk_trace *bt = q->blk_trace; - __be64 rpdu = cpu_to_be64(pdu); - - if (likely(!bt)) - return; - - if (bio) - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu); - else - __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); -} - -/** - * blk_add_trace_remap - Add a trace for a remap operation - * @q: queue the io is for - * @bio: the source bio - * @dev: target device - * @from: source sector - * @to: target sector - * - * Description: - * Device mapper or raid target sometimes need to split a bio because - * it spans a stripe (or similar). Add a trace for that action. - * - **/ -static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from, sector_t to) -{ - struct blk_trace *bt = q->blk_trace; - struct blk_io_trace_remap r; - - if (likely(!bt)) - return; - - r.device = cpu_to_be32(dev); - r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); - r.sector = cpu_to_be64(to); - - __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); -} - -/** - * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for - * @rq: io request - * @data: driver-specific data - * @len: length of driver-specific data - * - * Description: - * Some drivers might want to write driver-specific data per request. - * - **/ -static inline void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - if (blk_pc_request(rq)) - __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, - rq->errors, len, data); - else - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, - 0, BLK_TA_DRV_DATA, rq->errors, len, data); -} - +extern void blk_add_driver_data(struct request_queue *q, struct request *rq, + void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); @@ -356,13 +195,8 @@ extern int blk_trace_remove(struct request_queue *q); #else /* !CONFIG_BLK_DEV_IO_TRACE */ #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) #define blk_trace_shutdown(q) do { } while (0) -#define blk_add_trace_rq(q, rq, what) do { } while (0) -#define blk_add_trace_bio(q, rq, what) do { } while (0) -#define blk_add_trace_generic(q, rq, rw, what) do { } while (0) -#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) -#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) #define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) +#define blk_add_driver_data(q, rq, data, len) do {} while (0) #define blk_trace_setup(q, name, dev, arg) (-ENOTTY) #define blk_trace_startstop(q, start) (-ENOTTY) #define blk_trace_remove(q) (-ENOTTY) -- cgit v1.2.3 From dcc7461eef7341e84e2f7274f904ce01a43b2506 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 26 Nov 2008 13:36:59 +0000 Subject: wusb: add debug files for ASL, PZL and DI to the whci-hcd driver Add asl, pzl and di debugfs files to uwb/uwbN/wusbhc for WHCI host controller. These dump the current ASL, PZL and DI buffer. Signed-off-by: David Vrabel --- include/linux/uwb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index 1719709d60ca..d7ed5201ade6 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -394,6 +394,8 @@ struct uwb_rc { * @channel: channel being used by the PAL; 0 if the PAL isn't using * the radio; -1 if the PAL wishes to use the radio but * cannot. + * @debugfs_dir: a debugfs directory which the PAL can use for its own + * debugfs files. * * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP). @@ -418,6 +420,7 @@ struct uwb_pal { void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv); int channel; + struct dentry *debugfs_dir; }; void uwb_pal_init(struct uwb_pal *pal); -- cgit v1.2.3 From ce71e27c6fdc43c29f36d307b9100bde70c947fc Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Tue, 19 Aug 2008 20:43:25 +0300 Subject: SLUB: Replace __builtin_return_address(0) with _RET_IP_. This patch replaces __builtin_return_address(0) with _RET_IP_, since a previous patch moved _RET_IP_ and _THIS_IP_ to include/linux/kernel.h and they're widely available now. This makes for shorter and easier to read code. [penberg@cs.helsinki.fi: remove _RET_IP_ casts to void pointer] Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- include/linux/slab.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 000da12b5cf0..c97ed28559ec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -253,9 +253,9 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, * request comes from. */ #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) -extern void *__kmalloc_track_caller(size_t, gfp_t, void*); +extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); #define kmalloc_track_caller(size, flags) \ - __kmalloc_track_caller(size, flags, __builtin_return_address(0)) + __kmalloc_track_caller(size, flags, _RET_IP_) #else #define kmalloc_track_caller(size, flags) \ __kmalloc(size, flags) @@ -271,10 +271,10 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, void*); * allocation request comes from. */ #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) -extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); +extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node_track_caller(size, flags, node, \ - __builtin_return_address(0)) + _RET_IP_) #else #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node(size, flags, node) -- cgit v1.2.3 From e2f367f269fe19375f10e63efe0f2a6d3ddef8e6 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 21 Nov 2008 19:01:30 +0200 Subject: nl80211: Report max TX power in NL80211_BAND_ATTR_FREQS This is useful information to provide for userspace (e.g., hostapd needs this to generate Country IE). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 79827345351d..54d6ebe38e39 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -508,6 +508,7 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in dBm. */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -516,12 +517,15 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_PASSIVE_SCAN, NL80211_FREQUENCY_ATTR_NO_IBSS, NL80211_FREQUENCY_ATTR_RADAR, + NL80211_FREQUENCY_ATTR_MAX_TX_POWER, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1 }; +#define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER + /** * enum nl80211_bitrate_attr - bitrate attributes * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps -- cgit v1.2.3 From f80b5e99c7dac5a9a0d72496cec5075a12cd1476 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Fri, 21 Nov 2008 20:40:09 -0200 Subject: rfkill: preserve state across suspend The rfkill class API requires that the driver connected to a class call rfkill_force_state() on resume to update the real state of the rfkill controller, OR that it provides a get_state() hook. This means there is potentially a hidden call in the resume code flow that changes rfkill->state (i.e. rfkill_force_state()), so the previous state of the transmitter was being lost. The simplest and most future-proof way to fix this is to explicitly store the pre-sleep state on the rfkill structure, and restore from that on resume. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Cc: Matthew Garrett Cc: Alan Jenkins Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 4cd64b0d9825..f376a93927f7 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -108,6 +108,7 @@ struct rfkill { struct device dev; struct list_head node; + enum rfkill_state state_for_resume; }; #define to_rfkill(d) container_of(d, struct rfkill, dev) -- cgit v1.2.3 From bf8c1ac6d81ba8c0e4dc2215f84f5e2a3c8227e8 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Nov 2008 22:00:31 +0200 Subject: nl80211: Change max TX power to be in mBm instead of dBm In order to be consistent with NL80211_ATTR_POWER_RULE_MAX_EIRP, change NL80211_FREQUENCY_ATTR_MAX_TX_POWER to use mBm and U32 instead of dBm and U8. This is a userspace interface change, but the previous version had not yet been pushed upstream and there are no userspace programs using this yet, so there is justification to get this change in as long as it goes in before the previous version gets out. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 54d6ebe38e39..e08c8bcfb78d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -508,7 +508,8 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. - * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in dBm. + * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm + * (100 * dBm). */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, -- cgit v1.2.3 From d211af055d0c12dc3416c2886e6fbdc6eb74a381 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 24 Nov 2008 15:38:45 +0100 Subject: i386: get rid of the use of KPROBE_ENTRY / KPROBE_END entry_32.S is now the only user of KPROBE_ENTRY / KPROBE_END, treewide. This patch reorders entry_64.S and explicitly generates a separate section for functions that need the protection. The generated code before and after the patch is equal. The KPROBE_ENTRY and KPROBE_END macro's are removed too. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- include/linux/linkage.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 9fd1f859021b..fee9e59649c1 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -64,14 +64,6 @@ name: #endif -#define KPROBE_ENTRY(name) \ - .pushsection .kprobes.text, "ax"; \ - ENTRY(name) - -#define KPROBE_END(name) \ - END(name); \ - .popsection - #ifndef END #define END(name) \ .size name, .-name -- cgit v1.2.3 From 8b752e3ef6e3f5cde87afc649dd51d92b1e549c1 Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Fri, 28 Nov 2008 09:52:40 +0800 Subject: softirq: remove useless function __local_bh_enable Impact: remove unused code __local_bh_enable has been replaced with _local_bh_enable. As comments says "it always nests inside local_bh_enable() sections" has not been valid now. Also there is no reason to use __local_bh_enable anywhere, so we can remove this useless function. Signed-off-by: Liming Wang Signed-off-by: Ingo Molnar --- include/linux/bottom_half.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index 777dbf695d44..27b1bcffe408 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -2,7 +2,6 @@ #define _LINUX_BH_H extern void local_bh_disable(void); -extern void __local_bh_enable(void); extern void _local_bh_enable(void); extern void local_bh_enable(void); extern void local_bh_enable_ip(unsigned long ip); -- cgit v1.2.3 From a838c2ec6ea1f18431da74dfe4978c57355b95f3 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 27 Nov 2008 16:14:44 +0800 Subject: markers: comment marker_synchronize_unregister() on data dependency Add document and comments on marker_synchronize_unregister(): it should be called before freeing resources that the probes depend on. Based on comments from Lai Jiangshan and Mathieu Desnoyers. Signed-off-by: Wu Fengguang Reviewed-by: Mathieu Desnoyers Reviewed-by: Lai Jiangshan Signed-off-by: Ingo Molnar --- include/linux/marker.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 34c14bc957f5..b85e74ca782f 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -211,8 +211,10 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe, /* * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the end of module exit to make sure there is no caller - * executing a probe when it is freed. + * unregistration and the first one of + * - the end of module exit function + * - the free of any resource used by the probes + * to ensure the code and data are valid for any possibly running probes. */ #define marker_synchronize_unregister() synchronize_sched() -- cgit v1.2.3 From 0f0ca340e57bd7446855fefd07a64249acf81223 Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Fri, 28 Nov 2008 16:24:56 -0800 Subject: phy: power management support This patch adds the power management support into the physical abstraction layer. Suspend and resume functions respectively turns on/off the bit 11 into the PHY Basic mode control register. Generic PHY device starts supporting PM. In order to support the wake-on LAN and avoid to put in power down the PHY device, the MDIO is aware of what the Ethernet device wants to do. Voluntary, no CONFIG_PM defines were added into the sources. Also generic suspend/resume functions are exported to allow other drivers use them (such as genphy_config_aneg etc.). Within the phy_driver_register function, we need to remove the memset. It overrides the device driver owner and it is not good. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 77c4ed60b982..d7e54d98869f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -467,6 +467,8 @@ int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); +int genphy_suspend(struct phy_device *phydev); +int genphy_resume(struct phy_device *phydev); void phy_driver_unregister(struct phy_driver *drv); int phy_driver_register(struct phy_driver *new_driver); void phy_prepare_link(struct phy_device *phydev, -- cgit v1.2.3 From 1f55ed06cf0c361b293b32e5947d35d173eff2aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 1 Dec 2008 19:14:02 +0100 Subject: fuse: update interface version Change interface version to 7.11 after adding the IOCTL and POLL messages. Also clean up the header a bit: - update copyright date to 2008 - fix checkpatch warning: WARNING: Use #include instead of - remove FUSE_MAJOR define, which is not being used any more Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 5650cf033e73..162e5defe683 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi + Copyright (C) 2001-2008 Miklos Szeredi This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -20,26 +20,27 @@ * * 7.10 * - add nonseekable open flag + * + * 7.11 + * - add IOCTL message + * - add unsolicited notification support + * - add POLL message and NOTIFY_POLL notification */ #ifndef _LINUX_FUSE_H #define _LINUX_FUSE_H -#include -#include +#include /** Version number of this interface */ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 10 +#define FUSE_KERNEL_MINOR_VERSION 11 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 -/** The major number of the fuse character device */ -#define FUSE_MAJOR MISC_MAJOR - /* Make sure all structures are padded to 64bit boundary, so 32bit userspace works under 64bit kernels */ -- cgit v1.2.3 From 6c415b9234a8c71f290e5d4fddc467f103f32719 Mon Sep 17 00:00:00 2001 From: Arun R Bharadwaj Date: Mon, 1 Dec 2008 20:49:05 +0530 Subject: sched: add uid information to sched_debug for CONFIG_USER_SCHED Impact: extend information in /proc/sched_debug This patch adds uid information in sched_debug for CONFIG_USER_SCHED Signed-off-by: Arun R Bharadwaj Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7a69c4d224ee..d8733f07d80b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2218,6 +2218,7 @@ extern void normalize_rt_tasks(void); extern struct task_group init_task_group; #ifdef CONFIG_USER_SCHED extern struct task_group root_task_group; +extern void set_tg_uid(struct user_struct *user); #endif extern struct task_group *sched_create_group(struct task_group *parent); -- cgit v1.2.3 From 8789a9e7df6bf9b93739c4c7d4e380725bc9e936 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 15:34:07 -0500 Subject: ring-buffer: read page interface Impact: new API to ring buffer This patch adds a new interface into the ring buffer that allows a page to be read from the ring buffer on a given CPU. For every page read, one must also be given to allow for a "swap" of the pages. rpage = ring_buffer_alloc_read_page(buffer); if (!rpage) goto err; ret = ring_buffer_read_page(buffer, &rpage, cpu, full); if (!ret) goto empty; process_page(rpage); ring_buffer_free_read_page(rpage); The caller of these functions must handle any waits that are needed to wait for new data. The ring_buffer_read_page will simply return 0 if there is no data, or if "full" is set and the writer is still on the current page. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3bb87a753fa3..1a350a847edd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -124,6 +124,11 @@ void tracing_on(void); void tracing_off(void); void tracing_off_permanent(void); +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); +int ring_buffer_read_page(struct ring_buffer *buffer, + void **data_page, int cpu, int full); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; -- cgit v1.2.3 From 14a866c567e040ccf6240d68b083dd1dbbde63e6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 23:50:02 -0500 Subject: ftrace: add ftrace_graph_stop() Impact: new ftrace_graph_stop function While developing more features of function graph, I hit a bug that caused the WARN_ON to trigger in the prepare_ftrace_return function. Well, it was hard for me to find out that was happening because the bug would not print, it would just cause a hard lockup or reboot. The reason is that it is not safe to call printk from this function. Looking further, I also found that it calls unregister_ftrace_graph, which grabs a mutex and calls kstop machine. This would definitely lock the box up if it were to trigger. This patch adds a fast and safe ftrace_graph_stop() which will stop the function tracer. Then it is safe to call the WARN ON. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index afba918c623c..58ca1c3a3f4d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -376,6 +376,8 @@ typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); +extern void ftrace_graph_stop(void); + /* The current handlers in use */ extern trace_func_graph_ret_t ftrace_graph_return; extern trace_func_graph_ent_t ftrace_graph_entry; -- cgit v1.2.3 From e49dc19c6a19ea112fcb94b7c62ec62cdd5c08aa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 23:50:05 -0500 Subject: ftrace: function graph return for function entry Impact: feature, let entry function decide to trace or not This patch lets the graph tracer entry function decide if the tracing should be done at the end as well. This requires all function graph entry functions return 1 if it should trace, or 0 if the return should not be traced. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 58ca1c3a3f4d..469ceb3e85ba 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -371,7 +371,7 @@ struct ftrace_graph_ret { #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); -- cgit v1.2.3 From b908b53d580c3e9aba81ebe3339c5b7b4fa8031d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 1 Dec 2008 06:30:04 +0000 Subject: of/gpio: Implement of_get_gpio_flags() This adds a new function, of_get_gpio_flags, which is like of_get_gpio(), but accepts a new "flags" argument. This new function will be used by the drivers that need to retrieve additional GPIO information, such as active-low flag. Also, this changes the default ("simple") .xlate routine to warn about bogus (< 2) #gpio-cells usage: the second cell should always be present for GPIO flags. Signed-off-by: Anton Vorontsov Signed-off-by: Paul Mackerras --- include/linux/of_gpio.h | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 67db101d0eb8..e25abf610cb6 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -14,9 +14,22 @@ #ifndef __LINUX_OF_GPIO_H #define __LINUX_OF_GPIO_H +#include +#include #include #include +struct device_node; + +/* + * This is Linux-specific flags. By default controllers' and Linux' mapping + * match, but GPIO controllers are free to translate their own flags to + * Linux-specific in their .xlate callback. Though, 1:1 mapping is recommended. + */ +enum of_gpio_flags { + OF_GPIO_ACTIVE_LOW = 0x1, +}; + #ifdef CONFIG_OF_GPIO /* @@ -26,7 +39,7 @@ struct of_gpio_chip { struct gpio_chip gc; int gpio_cells; int (*xlate)(struct of_gpio_chip *of_gc, struct device_node *np, - const void *gpio_spec); + const void *gpio_spec, enum of_gpio_flags *flags); }; static inline struct of_gpio_chip *to_of_gpio_chip(struct gpio_chip *gc) @@ -50,20 +63,37 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc) return container_of(of_gc, struct of_mm_gpio_chip, of_gc); } -extern int of_get_gpio(struct device_node *np, int index); +extern int of_get_gpio_flags(struct device_node *np, int index, + enum of_gpio_flags *flags); + extern int of_mm_gpiochip_add(struct device_node *np, struct of_mm_gpio_chip *mm_gc); extern int of_gpio_simple_xlate(struct of_gpio_chip *of_gc, struct device_node *np, - const void *gpio_spec); + const void *gpio_spec, + enum of_gpio_flags *flags); #else /* Drivers may not strictly depend on the GPIO support, so let them link. */ -static inline int of_get_gpio(struct device_node *np, int index) +static inline int of_get_gpio_flags(struct device_node *np, int index, + enum of_gpio_flags *flags) { return -ENOSYS; } #endif /* CONFIG_OF_GPIO */ +/** + * of_get_gpio - Get a GPIO number to use with GPIO API + * @np: device node to get GPIO from + * @index: index of the GPIO + * + * Returns GPIO number to use with Linux generic GPIO API, or one of the errno + * value on the error condition. + */ +static inline int of_get_gpio(struct device_node *np, int index) +{ + return of_get_gpio_flags(np, index, NULL); +} + #endif /* __LINUX_OF_GPIO_H */ -- cgit v1.2.3 From 8865c418caf4e9dd2c24bdfae3a5a4106e143e60 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 3 Dec 2008 22:12:38 -0800 Subject: atm: 32-bit ioctl compatibility We lack compat ioctl support through most of the ATM code. This patch deals with most of it, and I can now at least use BR2684 and PPPoATM with 32-bit userspace. I haven't added a .compat_ioctl method to struct atm_ioctl, because AFAICT none of the current users need any conversion -- so we can just call the ->ioctl() method in every case. I looked at br2684, clip, lec, mpc, pppoatm and atmtcp. In svc_compat_ioctl() the only mangling which is needed is to change COMPAT_ATM_ADDPARTY to ATM_ADDPARTY. Although it's defined as _IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf) it doesn't actually _take_ a struct atm_iobuf as an argument -- it takes a struct sockaddr_atmsvc, which _is_ the same between 32-bit and 64-bit code, so doesn't need conversion. Almost all of vcc_ioctl() would have been identical, so I converted that into a core do_vcc_ioctl() function with an 'int compat' argument. I've done the same with atm_dev_ioctl(), where there _are_ a few differences, but still it's relatively contained and there would otherwise have been a lot of duplication. I haven't done any of the actual device-specific ioctls, although I've added a compat_ioctl method to struct atmdev_ops. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller --- include/linux/atm.h | 17 ++++++++++++++--- include/linux/atmdev.h | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atm.h b/include/linux/atm.h index c791ddd96939..d3b292174aeb 100644 --- a/include/linux/atm.h +++ b/include/linux/atm.h @@ -231,10 +231,21 @@ static __inline__ int atmpvc_addr_in_use(struct sockaddr_atmpvc addr) */ struct atmif_sioc { - int number; - int length; - void __user *arg; + int number; + int length; + void __user *arg; }; +#ifdef __KERNEL__ +#ifdef CONFIG_COMPAT +#include +struct compat_atmif_sioc { + int number; + int length; + compat_uptr_t arg; +}; +#endif +#endif + typedef unsigned short atm_backend_t; #endif diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index a3d07c29d16c..086e5c362d3a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -100,6 +100,10 @@ struct atm_dev_stats { /* use backend to make new if */ #define ATM_ADDPARTY _IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf) /* add party to p2mp call */ +#ifdef CONFIG_COMPAT +/* It actually takes struct sockaddr_atmsvc, not struct atm_iobuf */ +#define COMPAT_ATM_ADDPARTY _IOW('a', ATMIOC_SPECIAL+4,struct compat_atm_iobuf) +#endif #define ATM_DROPPARTY _IOW('a', ATMIOC_SPECIAL+5,int) /* drop party from p2mp call */ @@ -224,6 +228,13 @@ struct atm_cirange { extern struct proc_dir_entry *atm_proc_root; #endif +#ifdef CONFIG_COMPAT +#include +struct compat_atm_iobuf { + int length; + compat_uptr_t buffer; +}; +#endif struct k_atm_aal_stats { #define __HANDLE_ITEM(i) atomic_t i @@ -379,6 +390,10 @@ struct atmdev_ops { /* only send is required */ int (*open)(struct atm_vcc *vcc); void (*close)(struct atm_vcc *vcc); int (*ioctl)(struct atm_dev *dev,unsigned int cmd,void __user *arg); +#ifdef CONFIG_COMPAT + int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, + void __user *arg); +#endif int (*getsockopt)(struct atm_vcc *vcc,int level,int optname, void __user *optval,int optlen); int (*setsockopt)(struct atm_vcc *vcc,int level,int optname, -- cgit v1.2.3 From ea4e2bc4d9f7370e57a343ccb5e7c0ad3222ec3c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 3 Dec 2008 15:36:57 -0500 Subject: ftrace: graph of a single function This patch adds the file: /debugfs/tracing/set_graph_function which can be used along with the function graph tracer. When this file is empty, the function graph tracer will act as usual. When the file has a function in it, the function graph tracer will only trace that function. For example: # echo blk_unplug > /debugfs/tracing/set_graph_function # cat /debugfs/tracing/trace [...] ------------------------------------------ | 2) make-19003 => kjournald-2219 ------------------------------------------ 2) | blk_unplug() { 2) | dm_unplug_all() { 2) | dm_get_table() { 2) 1.381 us | _read_lock(); 2) 0.911 us | dm_table_get(); 2) 1. 76 us | _read_unlock(); 2) + 12.912 us | } 2) | dm_table_unplug_all() { 2) | blk_unplug() { 2) 0.778 us | generic_unplug_device(); 2) 2.409 us | } 2) 5.992 us | } 2) 0.813 us | dm_table_put(); 2) + 29. 90 us | } 2) + 34.532 us | } You can add up to 32 functions into this file. Currently we limit it to 32, but this may change with later improvements. To add another function, use the append '>>': # echo sys_read >> /debugfs/tracing/set_graph_function # cat /debugfs/tracing/set_graph_function blk_unplug sys_read Using the '>' will clear out the function and write anew: # echo sys_write > /debug/tracing/set_graph_function # cat /debug/tracing/set_graph_function sys_write Note, if you have function graph running while doing this, the small time between clearing it and updating it will cause the graph to record all functions. This should not be an issue because after it sets the filter, only those functions will be recorded from then on. If you need to only record a particular function then set this file first before starting the function graph tracer. In the future this side effect may be corrected. The set_graph_function file is similar to the set_ftrace_filter but it does not take wild cards nor does it allow for more than one function to be set with a single write. There is no technical reason why this is the case, I just do not have the time yet to implement that. Note, dynamic ftrace must be enabled for this to appear because it uses the dynamic ftrace records to match the name to the mcount call sites. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 4 ++++ 2 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 469ceb3e85ba..b295d3106bfe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -7,6 +7,7 @@ #include #include #include +#include #ifdef CONFIG_FUNCTION_TRACER @@ -391,4 +392,49 @@ static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } #endif +#ifdef CONFIG_TRACING +#include + +/* flags for current->trace */ +enum { + TSK_TRACE_FL_TRACE_BIT = 0, + TSK_TRACE_FL_GRAPH_BIT = 1, +}; +enum { + TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT, + TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT, +}; + +static inline void set_tsk_trace_trace(struct task_struct *tsk) +{ + set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); +} + +static inline void clear_tsk_trace_trace(struct task_struct *tsk) +{ + clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); +} + +static inline int test_tsk_trace_trace(struct task_struct *tsk) +{ + return tsk->trace & TSK_TRACE_FL_TRACE; +} + +static inline void set_tsk_trace_graph(struct task_struct *tsk) +{ + set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); +} + +static inline void clear_tsk_trace_graph(struct task_struct *tsk) +{ + clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); +} + +static inline int test_tsk_trace_graph(struct task_struct *tsk) +{ + return tsk->trace & TSK_TRACE_FL_GRAPH; +} + +#endif /* CONFIG_TRACING */ + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2d0a93c31228..4c152e0acc9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1380,6 +1380,10 @@ struct task_struct { */ atomic_t trace_overrun; #endif +#ifdef CONFIG_TRACING + /* state flags for use by tracers */ + unsigned long trace; +#endif }; /* -- cgit v1.2.3 From 5ef6476190d24419a9a537baa0b5641845136989 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 4 Dec 2008 00:26:39 -0500 Subject: pid: fix the do_each_pid_task() macro Impact: macro side-effects fix This patch adds parenthesis around 'pid' in the do_each_pid_task macro to allow callers to pass in more complex parameters. e.g. do_each_pid_task(*pid, type, task) Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/pid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index d7e98ff8021e..bb206c56d1f0 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -147,9 +147,9 @@ pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ struct hlist_node *pos___; \ - if (pid != NULL) \ + if ((pid) != NULL) \ hlist_for_each_entry_rcu((task), pos___, \ - &pid->tasks[type], pids[type].node) { + &(pid)->tasks[type], pids[type].node) { /* * Both old and new leaders may be attached to -- cgit v1.2.3 From 00ef9f7348dfd2fc223ec42aceb30836e86b367f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Dec 2008 09:00:17 +0100 Subject: lockdep: change a held lock's class Impact: introduce new lockdep API Allow to change a held lock's class. Basically the same as the existing code to change a subclass therefore reuse all that. The XFS code will be able to use this to annotate their inode locking. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 8956daf64abd..37a0361f4685 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -314,8 +314,15 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, extern void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); -extern void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, - unsigned long ip); +extern void lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip); + +static inline void lock_set_subclass(struct lockdep_map *lock, + unsigned int subclass, unsigned long ip) +{ + lock_set_class(lock, lock->name, lock->key, subclass, ip); +} # define INIT_LOCKDEP .lockdep_recursion = 0, @@ -333,6 +340,7 @@ static inline void lockdep_on(void) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, n, i) do { } while (0) +# define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) -- cgit v1.2.3 From c9bb6003dd096ad190e1594a7d835ae1c39fae8f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 29 Oct 2008 23:46:09 -0700 Subject: of: Fix comment, sparc no longer uses of_device objects on special busses. It only uses of_platform_bus_type. Signed-off-by: David S. Miller --- include/linux/of_platform.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index a8efcfeea732..3d327b67d7e2 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -26,8 +26,7 @@ extern struct bus_type of_platform_bus_type; /* * An of_platform_driver driver is attached to a basic of_device on - * the "platform bus" (of_platform_bus_type) (or ISA, EBUS and SBUS - * busses on sparc). + * the "platform bus" (of_platform_bus_type). */ struct of_platform_driver { -- cgit v1.2.3 From 21a8c466f99063eeb8567318b4e305eda9015408 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Dec 2008 23:51:23 +0100 Subject: tracing/ftrace: provide the macro task_curr_ret_stack() Impact: cleanup As suggested by Steven Rostedt, this patch provide a new macro task_curr_ret_stack() to move the cpp conditionnal CONFIG into the linux/ftrace.h headers. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b295d3106bfe..b9b4d0a22d10 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CONFIG_FUNCTION_TRACER @@ -387,9 +388,19 @@ extern void unregister_ftrace_graph(void); extern void ftrace_graph_init_task(struct task_struct *t); extern void ftrace_graph_exit_task(struct task_struct *t); + +static inline int task_curr_ret_stack(struct task_struct *t) +{ + return t->curr_ret_stack; +} #else static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } + +static inline int task_curr_ret_stack(struct task_struct *tsk) +{ + return -1; +} #endif #ifdef CONFIG_TRACING -- cgit v1.2.3 From 72bdcf34380917260da41e3c49e10edee04bc5cd Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 26 Nov 2008 16:15:24 +0200 Subject: nl80211: Add frequency configuration (including HT40) This patch adds new NL80211_CMD_SET_WIPHY attributes NL80211_ATTR_WIPHY_FREQ and NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET to allow userspace to set the operating channel (e.g., hostapd for AP mode). Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e08c8bcfb78d..92f79d2bdd8c 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -26,8 +26,9 @@ * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request * to get a list of all present wiphys. * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or - * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME - * and/or %NL80211_ATTR_WIPHY_TXQ_PARAMS. + * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, and/or + * %NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET. * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request * or rename notification. Has attributes %NL80211_ATTR_WIPHY and * %NL80211_ATTR_WIPHY_NAME. @@ -180,6 +181,14 @@ enum nl80211_commands { * /sys/class/ieee80211//index * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters + * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz + * @NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET: included with NL80211_ATTR_WIPHY_FREQ + * if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included): + * NL80211_SEC_CHAN_NO_HT = HT not allowed (i.e., same as not including + * this attribute) + * NL80211_SEC_CHAN_DISABLED = HT20 only + * NL80211_SEC_CHAN_BELOW = secondary channel is below the primary channel + * NL80211_SEC_CHAN_ABOVE = secondary channel is above the primary channel * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -315,6 +324,8 @@ enum nl80211_attrs { NL80211_ATTR_BSS_BASIC_RATES, NL80211_ATTR_WIPHY_TXQ_PARAMS, + NL80211_ATTR_WIPHY_FREQ, + NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET, /* add attributes here, update the policy in nl80211.c */ @@ -329,6 +340,8 @@ enum nl80211_attrs { #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES #define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS +#define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ +#define NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -742,4 +755,10 @@ enum nl80211_txq_q { NL80211_TXQ_Q_BK }; +enum nl80211_sec_chan_offset { + NL80211_SEC_CHAN_NO_HT /* No HT */, + NL80211_SEC_CHAN_DISABLED /* HT20 only */, + NL80211_SEC_CHAN_BELOW /* HT40- */, + NL80211_SEC_CHAN_ABOVE /* HT40+ */ +}; #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 10ec4f1d0851eb97cd53db66150835dd7f64829d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 26 Nov 2008 13:03:08 -0800 Subject: nl80211: relicense nl80211.h under the ISC We have a few BSD/ISC licensed userspace applications which include nl80211.h from the kernel. To avoid legal ambiguity for usage of the header file in these projects we rather simply relicense the header file under the ISC. We've received consent from all contributors to it. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Acked-by: Michael Wu Acked-by: Luis Carlos Cobo Acked-by: Michael Buesch Acked-by: Jouni Malinen Acked-by: Colin McCabe Acked-by: Javier Cardona Cc: johannes@sipsolutions.net Cc: altape@eden.rutgers.edu Cc: luisca@cozybit.com Cc: mb@bu3sch.de Cc: jouni.malinen@atheros.com Cc: colin@cozybit.com Cc: javier@cozybit.com Signed-off-by: John W. Linville --- include/linux/nl80211.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 92f79d2bdd8c..04d4516f9c71 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -3,7 +3,26 @@ /* * 802.11 netlink interface public header * - * Copyright 2006, 2007 Johannes Berg + * Copyright 2006, 2007, 2008 Johannes Berg + * Copyright 2008 Michael Wu + * Copyright 2008 Luis Carlos Cobo + * Copyright 2008 Michael Buesch + * Copyright 2008 Luis R. Rodriguez + * Copyright 2008 Jouni Malinen + * Copyright 2008 Colin McCabe + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * */ /** -- cgit v1.2.3 From e088e4c9cdb618675874becb91b2fd581ee707e6 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 25 Nov 2008 13:29:47 -0500 Subject: [CPUFREQ] Disable sysfs ui for p4-clockmod. p4-clockmod has a long history of abuse. It pretends to be a CPU frequency scaling driver, even though it doesn't actually change the CPU frequency, but instead just modulates the frequency with wait-states. The biggest misconception is that when running at the lower 'frequency' p4-clockmod is saving power. This isn't the case, as workloads running slower take longer to complete, preventing the CPU from entering deep C states. However p4-clockmod does have a purpose. It can prevent overheating. Having it hooked up to the cpufreq interfaces is the wrong way to achieve cooling however. It should instead be hooked up to ACPI. This diff introduces a means for a cpufreq driver to register with the cpufreq core, but not present a sysfs interface. Signed-off-by: Matthew Garrett Signed-off-by: Dave Jones --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1ee608fd7b77..484b3abf61bb 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -234,6 +234,7 @@ struct cpufreq_driver { int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); struct freq_attr **attr; + bool hide_interface; }; /* flags */ -- cgit v1.2.3 From b74ca3a896b9ab5f952bc440154758e708c48884 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Mon, 8 Dec 2008 01:14:16 -0800 Subject: netdevice: Kill netdev->priv This is the last shoot of this series. After I removing all directly reference of netdev->priv, I am killing "priv" of "struct net_device" and fixing relative comments/docs. Anyone will not be allowed to reference netdev->priv directly. If you want to reference the memory of private data, use netdev_priv() instead. If the private data is not allocted when alloc_netdev(), use netdev->ml_priv to point that memory after you creating that private data. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- include/linux/hdlc.h | 2 +- include/linux/netdevice.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e960faac609d..fd47a151665e 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -43,7 +43,7 @@ struct hdlc_proto { }; -/* Pointed to by dev->priv */ +/* Pointed to by netdev_priv(dev) */ typedef struct hdlc_device { /* used by HDLC layer to take control over HDLC device from hw driver*/ int (*attach)(struct net_device *dev, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0df0db068ac3..47e731528315 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -785,7 +785,6 @@ struct net_device /* * One part is mostly used on xmit path (device) */ - void *priv; /* pointer to private data */ /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ -- cgit v1.2.3 From 0049bab5e765aa74cf767a834fa336e19453fc5e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:18:05 -0800 Subject: dccp: Remove obsolete parts of the old CCID interface The TX/RX CCIDs of the minisock are now redundant: similar to the Ack Vector case, their value equals initially that of the sysctl, but at the end of feature negotiation may be something different. The old interface removed by this patch thus has been replaced by the newer interface to dynamically query the currently loaded CCIDs. Also removed are the constructors for the TX CCID and the RX CCID, since the switch "rx <-> non-rx" is done by the handler in minisocks.c (and the handler is the only place in the code where CCIDs are loaded). Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6a72ff52a8a4..46daea312d92 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -370,7 +370,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated @@ -378,8 +377,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_rx_ccid; - __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; -- cgit v1.2.3 From 4098dce5be537a157eed4a326efd464109825b8b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:18:37 -0800 Subject: dccp: Remove manual influence on NDP Count feature Updating the NDP count feature is handled automatically now: * for CCID-2 it is disabled, since the code does not use NDP counts; * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths. Allowing the user to change NDP values leads to unpredictable and failing behaviour, since it is then possible to disable NDP counts even when they are needed (e.g. in CCID-3). This means that only those user settings are sensible that agree with the values for Send NDP Count implied by the choice of CCID. But those settings are already activated by the feature negotiation (CCID dependency tracking), hence this form of support is redundant. At startup the initialisation of the NDP count feature uses the default value of 0, which is done implicitly by the zeroing-out of the socket when it is allocated. If the choice of CCID or feature negotiation enables NDP count, this will then be updated via the NDP activation handler. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 46daea312d92..60e94438eadd 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -371,14 +371,12 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) - * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; __u8 dccpms_send_ack_vector; - __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; struct list_head dccpms_conf; }; @@ -490,6 +488,7 @@ struct dccp_ackvec; * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) + * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) @@ -529,6 +528,7 @@ struct dccp_sock { __u16 dccps_r_ack_ratio; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; + __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.2.3 From 6fdd34d43bff8be9bb925b49d87a0ee144d2ab07 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:19:06 -0800 Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl This removes the use of the sysctl and the minisock variable for the Send Ack Vector feature, as it now is handled fully dynamically via feature negotiation (i.e. when CCID-2 is enabled, Ack Vectors are automatically enabled as per RFC 4341, 4.). Using a sysctl in parallel to this implementation would open the door to crashes, since much of the code relies on tests of the boolean minisock / sysctl variable. Thus, this patch replaces all tests of type if (dccp_msk(sk)->dccpms_send_ack_vector) /* ... */ with if (dp->dccps_hc_rx_ackvec != NULL) /* ... */ The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature negotiation concluded that Ack Vectors are to be used on the half-connection. Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child), so that the test is a valid one. The activation handler for Ack Vectors is called as soon as the feature negotiation has concluded at the * server when the Ack marking the transition RESPOND => OPEN arrives; * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN. Adding the sequence number of the Response packet to the Ack Vector has been removed, since (a) connection establishment implies that the Response has been received; (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e. this entry will always be ignored; (c) it can not be used for anything useful - to detect loss for instance, only packets received after the loss can serve as pseudo-dupacks. There was a FIXME to change the error code when dccp_ackvec_add() fails. I removed this after finding out that: * the check whether ackno < ISN is already made earlier, * this Response is likely the 1st packet with an Ackno that the client gets, * so when dccp_ackvec_add() fails, the reason is likely not a packet error. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 60e94438eadd..61734e27abb7 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -360,7 +360,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 #define DCCPF_INITIAL_CCID DCCPC_CCID2 -#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -370,13 +369,11 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_send_ack_vector; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.2.3 From 361b73d5c34f59c3fd107bb9dbe7a1fbff2c2517 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 8 Dec 2008 10:58:08 +0800 Subject: ring_buffer: fix comments Impact: comments cleanup fix incorrect comments for enum ring_buffer_type Signed-off-by: Lai Jiangshan Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 1a350a847edd..d363467c8f13 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -28,17 +28,19 @@ struct ring_buffer_event { * size = 8 bytes * * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock - * array[0] = tv_nsec - * array[1] = tv_sec + * array[0] = tv_nsec + * array[1..2] = tv_sec * size = 16 bytes * * @RINGBUF_TYPE_DATA: Data record * If len is zero: * array[0] holds the actual length - * array[1..(length+3)/4-1] holds data + * array[1..(length+3)/4] holds data + * size = 4 + 4 + length (bytes) * else * length = len << 2 - * array[0..(length+3)/4] holds data + * array[0..(length+3)/4-1] holds data + * size = 4 + length (bytes) */ enum ring_buffer_type { RINGBUF_TYPE_PADDING, -- cgit v1.2.3 From 8b96f0119818964e4944fd1c423bf6770027d3ac Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Dec 2008 03:40:00 +0100 Subject: tracing/function-graph-tracer: introduce __notrace_funcgraph to filter special functions Impact: trace more functions When the function graph tracer is configured, three more files are not traced to prevent only four functions to be traced. And this impacts the normal function tracer too. arch/x86/kernel/process_64/32.c: I had crashes when I let this file traced. After some debugging, I saw that the "current" task point was changed inside__swtich_to(), ie: "write_pda(pcurrent, next_p);" inside process_64.c Since the tracer store the original return address of the function inside current, we had crashes. Only __switch_to() has to be excluded from tracing. kernel/module.c and kernel/extable.c: Because of a function used internally by the function graph tracer: __kernel_text_address() To let the other functions inside these files to be traced, this patch introduces the __notrace_funcgraph function prefix which is __notrace if function graph tracer is configured and nothing if not. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b9b4d0a22d10..449fa8e9e34f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -369,6 +369,14 @@ struct ftrace_graph_ret { }; #ifdef CONFIG_FUNCTION_GRAPH_TRACER + +/* + * Sometimes we don't want to trace a function with the function + * graph tracer but we want them to keep traced by the usual function + * tracer if the function graph tracer is not configured. + */ +#define __notrace_funcgraph notrace + #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of the callback handlers for tracing function graph*/ @@ -394,6 +402,9 @@ static inline int task_curr_ret_stack(struct task_struct *t) return t->curr_ret_stack; } #else + +#define __notrace_funcgraph + static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } -- cgit v1.2.3 From 380c4b1411ccd6885f92b2c8ceb08433a720f44e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Dec 2008 03:43:41 +0100 Subject: tracing/function-graph-tracer: append the tracing_graph_flag Impact: Provide a way to pause the function graph tracer As suggested by Steven Rostedt, the previous patch that prevented from spinlock function tracing shouldn't use the raw_spinlock to fix it. It's much better to follow lockdep with normal spinlock, so this patch adds a new flag for each task to make the function graph tracer able to be paused. We also can send an ftrace_printk whithout worrying of the irrelevant traced spinlock during insertion. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 13 +++++++++++++ include/linux/sched.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 449fa8e9e34f..11cac81eed08 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -401,6 +401,16 @@ static inline int task_curr_ret_stack(struct task_struct *t) { return t->curr_ret_stack; } + +static inline void pause_graph_tracing(void) +{ + atomic_inc(¤t->tracing_graph_pause); +} + +static inline void unpause_graph_tracing(void) +{ + atomic_dec(¤t->tracing_graph_pause); +} #else #define __notrace_funcgraph @@ -412,6 +422,9 @@ static inline int task_curr_ret_stack(struct task_struct *tsk) { return -1; } + +static inline void pause_graph_tracing(void) { } +static inline void unpause_graph_tracing(void) { } #endif #ifdef CONFIG_TRACING diff --git a/include/linux/sched.h b/include/linux/sched.h index 4c152e0acc9e..4b81fc5f7731 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1379,6 +1379,8 @@ struct task_struct { * because of depth overrun. */ atomic_t trace_overrun; + /* Pause for the tracing */ + atomic_t tracing_graph_pause; #endif #ifdef CONFIG_TRACING /* state flags for use by tracers */ -- cgit v1.2.3 From 1e641743f055f075ed9a4edd75f1fb1e05669ddc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 9 Dec 2008 09:23:33 +0000 Subject: Audit: Log TIOCSTI AUDIT_TTY records currently log all data read by processes marked for TTY input auditing, even if the data was "pushed back" using the TIOCSTI ioctl, not typed by the user. This patch records all TIOCSTI calls to disambiguate the input. It generates one audit message per character pushed back; considering TIOCSTI is used very rarely, this simple solution is probably good enough. (The only program I could find that uses TIOCSTI is mailx/nail in "header editing" mode, e.g. using the ~h escape. mailx is used very rarely, and the escapes are used even rarer.) Signed-Off-By: Miloslav Trmac Signed-off-by: Al Viro Signed-off-by: James Morris --- include/linux/tty.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 3b8121d4e36f..580700f20a1c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -442,6 +442,7 @@ extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, size_t size); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); +extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid); @@ -450,6 +451,9 @@ static inline void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, size_t size) { } +static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) +{ +} static inline void tty_audit_exit(void) { } -- cgit v1.2.3 From 58494487581cb143a0d763e3056a894d5009d60a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 26 Nov 2008 12:02:53 +0100 Subject: oprofile: update comment for oprofile_add_sample() The cpu argument is no longer part of the parameter list. Signed-off-by: Robert Richter --- include/linux/oprofile.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 5231861f357d..1ce9fe572e51 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -86,8 +86,7 @@ int oprofile_arch_init(struct oprofile_operations * ops); void oprofile_arch_exit(void); /** - * Add a sample. This may be called from any context. Pass - * smp_processor_id() as cpu. + * Add a sample. This may be called from any context. */ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event); -- cgit v1.2.3 From e09373f22e76cc048ca5fe10a9ff9012f5d64309 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 26 Nov 2008 14:04:19 +0100 Subject: ring_buffer: add remaining cpu functions to ring_buffer.h These functions are not yet in ring_buffer.h though they seems to be part of the API. Cc: Steven Rostedt Signed-off-by: Robert Richter --- include/linux/ring_buffer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e097c2e6b6dc..de9d8c12e5ec 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -116,6 +116,8 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_entries(struct ring_buffer *buffer); unsigned long ring_buffer_overruns(struct ring_buffer *buffer); +unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(int cpu); void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); -- cgit v1.2.3 From 69423d99fc182a81f3c5db3eb5c140acc6fc64be Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 10 Dec 2008 13:37:21 +0000 Subject: [MTD] update internal API to support 64-bit device size MTD internal API presently uses 32-bit values to represent device size. This patch updates them to 64-bits but leaves the external API unchanged. Extending the external API is a separate issue for several reasons. First, no one needs it at the moment. Secondly, whether the implementation is done with IOCTLs, sysfs or both is still debated. Thirdly external API changes require the internal API to be accepted first. Note that although the MTD API will be able to support 64-bit device sizes, existing drivers do not and are not required to do so, although NAND base has been updated. In general, changing from 32-bit to 64-bit values cause little or no changes to the majority of the code with the following exceptions: - printk message formats - division and modulus of 64-bit values - NAND base support - 32-bit local variables used by mtdpart and mtdconcat - naughtily assuming one structure maps to another in MEMERASE ioctl Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 57 ++++++++++++++++++++++++++++++++++++------ include/linux/mtd/nand.h | 2 +- include/linux/mtd/partitions.h | 4 +-- 3 files changed, 52 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eae26bb6430a..95e585ecc297 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -15,6 +15,8 @@ #include #include +#include + #define MTD_CHAR_MAJOR 90 #define MTD_BLOCK_MAJOR 31 #define MAX_MTD_DEVICES 32 @@ -25,16 +27,16 @@ #define MTD_ERASE_DONE 0x08 #define MTD_ERASE_FAILED 0x10 -#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff +#define MTD_FAIL_ADDR_UNKNOWN -1LL /* If the erase fails, fail_addr might indicate exactly which block failed. If fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not specific to any particular block. */ struct erase_info { struct mtd_info *mtd; - u_int32_t addr; - u_int32_t len; - u_int32_t fail_addr; + uint64_t addr; + uint64_t len; + uint64_t fail_addr; u_long time; u_long retries; u_int dev; @@ -46,7 +48,7 @@ struct erase_info { }; struct mtd_erase_region_info { - u_int32_t offset; /* At which this region starts, from the beginning of the MTD */ + uint64_t offset; /* At which this region starts, from the beginning of the MTD */ u_int32_t erasesize; /* For this region */ u_int32_t numblocks; /* Number of blocks of erasesize in this region */ unsigned long *lockmap; /* If keeping bitmap of locks */ @@ -101,7 +103,7 @@ struct mtd_oob_ops { struct mtd_info { u_char type; u_int32_t flags; - u_int32_t size; // Total size of the MTD + uint64_t size; // Total size of the MTD /* "Major" erase size for the device. NaĂŻve users may take this * to be the only erase size available, or may use the more detailed @@ -120,6 +122,16 @@ struct mtd_info { u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) u_int32_t oobavail; // Available OOB bytes per block + /* + * If erasesize is a power of 2 then the shift is stored in + * erasesize_shift otherwise erasesize_shift is zero. Ditto writesize. + */ + unsigned int erasesize_shift; + unsigned int writesize_shift; + /* Masks based on erasesize_shift and writesize_shift */ + unsigned int erasesize_mask; + unsigned int writesize_mask; + // Kernel-only stuff starts here. const char *name; int index; @@ -190,8 +202,8 @@ struct mtd_info { void (*sync) (struct mtd_info *mtd); /* Chip-supported device locking */ - int (*lock) (struct mtd_info *mtd, loff_t ofs, size_t len); - int (*unlock) (struct mtd_info *mtd, loff_t ofs, size_t len); + int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); + int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); /* Power Management functions */ int (*suspend) (struct mtd_info *mtd); @@ -221,6 +233,35 @@ struct mtd_info { void (*put_device) (struct mtd_info *mtd); }; +static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->erasesize_shift) + return sz >> mtd->erasesize_shift; + do_div(sz, mtd->erasesize); + return sz; +} + +static inline u_int32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->erasesize_shift) + return sz & mtd->erasesize_mask; + return do_div(sz, mtd->erasesize); +} + +static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->writesize_shift) + return sz >> mtd->writesize_shift; + do_div(sz, mtd->writesize); + return sz; +} + +static inline u_int32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->writesize_shift) + return sz & mtd->writesize_mask; + return do_div(sz, mtd->writesize); +} /* Kernel-side ioctl definitions */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 733d3f3b4eb8..c0677b8082be 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -399,7 +399,7 @@ struct nand_chip { int bbt_erase_shift; int chip_shift; int numchips; - unsigned long chipsize; + uint64_t chipsize; int pagemask; int pagebuf; int subpagesize; diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index c92b4d439609..164c7d78687d 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -36,8 +36,8 @@ struct mtd_partition { char *name; /* identifier string */ - u_int32_t size; /* partition size */ - u_int32_t offset; /* offset within the master MTD space */ + uint64_t size; /* partition size */ + uint64_t offset; /* offset within the master MTD space */ u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ struct mtd_info **mtdp; /* pointer to store the MTD object */ -- cgit v1.2.3 From 3854be7712f7b4bdcaed14664fc7c7124b3fef0d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 10 Dec 2008 14:06:42 +0000 Subject: [MTD] Remove strange u_int32_t types from FTL Signed-off-by: David Woodhouse --- include/linux/mtd/ftl.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/ftl.h b/include/linux/mtd/ftl.h index 0be442f881dd..0555f7a0b9ed 100644 --- a/include/linux/mtd/ftl.h +++ b/include/linux/mtd/ftl.h @@ -32,25 +32,25 @@ #define _LINUX_FTL_H typedef struct erase_unit_header_t { - u_int8_t LinkTargetTuple[5]; - u_int8_t DataOrgTuple[10]; - u_int8_t NumTransferUnits; - u_int32_t EraseCount; - u_int16_t LogicalEUN; - u_int8_t BlockSize; - u_int8_t EraseUnitSize; - u_int16_t FirstPhysicalEUN; - u_int16_t NumEraseUnits; - u_int32_t FormattedSize; - u_int32_t FirstVMAddress; - u_int16_t NumVMPages; - u_int8_t Flags; - u_int8_t Code; - u_int32_t SerialNumber; - u_int32_t AltEUHOffset; - u_int32_t BAMOffset; - u_int8_t Reserved[12]; - u_int8_t EndTuple[2]; + uint8_t LinkTargetTuple[5]; + uint8_t DataOrgTuple[10]; + uint8_t NumTransferUnits; + uint32_t EraseCount; + uint16_t LogicalEUN; + uint8_t BlockSize; + uint8_t EraseUnitSize; + uint16_t FirstPhysicalEUN; + uint16_t NumEraseUnits; + uint32_t FormattedSize; + uint32_t FirstVMAddress; + uint16_t NumVMPages; + uint8_t Flags; + uint8_t Code; + uint32_t SerialNumber; + uint32_t AltEUHOffset; + uint32_t BAMOffset; + uint8_t Reserved[12]; + uint8_t EndTuple[2]; } erase_unit_header_t; /* Flags in erase_unit_header_t */ -- cgit v1.2.3 From 26cdb67c74aedc22367e6d0271f7f955220cca65 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 10 Dec 2008 14:08:12 +0000 Subject: [MTD] Remove more strange u_intxx_t types Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 26 +++++++++++++------------- include/linux/mtd/partitions.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 95e585ecc297..adef674855f3 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -39,8 +39,8 @@ struct erase_info { uint64_t fail_addr; u_long time; u_long retries; - u_int dev; - u_int cell; + unsigned dev; + unsigned cell; void (*callback) (struct erase_info *self); u_long priv; u_char state; @@ -49,8 +49,8 @@ struct erase_info { struct mtd_erase_region_info { uint64_t offset; /* At which this region starts, from the beginning of the MTD */ - u_int32_t erasesize; /* For this region */ - u_int32_t numblocks; /* Number of blocks of erasesize in this region */ + uint32_t erasesize; /* For this region */ + uint32_t numblocks; /* Number of blocks of erasesize in this region */ unsigned long *lockmap; /* If keeping bitmap of locks */ }; @@ -102,14 +102,14 @@ struct mtd_oob_ops { struct mtd_info { u_char type; - u_int32_t flags; + uint32_t flags; uint64_t size; // Total size of the MTD /* "Major" erase size for the device. NaĂŻve users may take this * to be the only erase size available, or may use the more detailed * information below if they desire */ - u_int32_t erasesize; + uint32_t erasesize; /* Minimal writable flash unit size. In case of NOR flash it is 1 (even * though individual bits can be cleared), in case of NAND flash it is * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR @@ -117,10 +117,10 @@ struct mtd_info { * Any driver registering a struct mtd_info must ensure a writesize of * 1 or larger. */ - u_int32_t writesize; + uint32_t writesize; - u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) - u_int32_t oobavail; // Available OOB bytes per block + uint32_t oobsize; // Amount of OOB data per block (e.g. 16) + uint32_t oobavail; // Available OOB bytes per block /* * If erasesize is a power of 2 then the shift is stored in @@ -233,7 +233,7 @@ struct mtd_info { void (*put_device) (struct mtd_info *mtd); }; -static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) { if (mtd->erasesize_shift) return sz >> mtd->erasesize_shift; @@ -241,14 +241,14 @@ static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) return sz; } -static inline u_int32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) { if (mtd->erasesize_shift) return sz & mtd->erasesize_mask; return do_div(sz, mtd->erasesize); } -static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) { if (mtd->writesize_shift) return sz >> mtd->writesize_shift; @@ -256,7 +256,7 @@ static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) return sz; } -static inline u_int32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) +static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) { if (mtd->writesize_shift) return sz & mtd->writesize_mask; diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 164c7d78687d..a45dd831b3f8 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -38,7 +38,7 @@ struct mtd_partition { char *name; /* identifier string */ uint64_t size; /* partition size */ uint64_t offset; /* offset within the master MTD space */ - u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ + uint32_t mask_flags; /* master MTD flags to mask out for this partition */ struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ struct mtd_info **mtdp; /* pointer to store the MTD object */ }; -- cgit v1.2.3 From cdc693643271b2e6a693cf8f6afb258cce01f058 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 10 Dec 2008 13:55:49 +0000 Subject: ALSA: Add support for mechanical jack insertion Some systems support both mechanical and electrical jack detection, allowing them to report that a jack is physically present but does not have any functioning connections. Add a new jack type for these, allowing user space to report faulty connections. Thanks to Guillem Jover for the suggestion. Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai --- include/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 7323d2ff5151..abd223b0f586 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -645,6 +645,7 @@ struct input_absinfo { #define SW_MICROPHONE_INSERT 0x04 /* set = inserted */ #define SW_DOCK 0x05 /* set = plugged into dock */ #define SW_LINEOUT_INSERT 0x06 /* set = inserted */ +#define SW_JACK_PHYSICAL_INSERT 0x07 /* set = mechanical switch set */ #define SW_MAX 0x0f #define SW_CNT (SW_MAX+1) -- cgit v1.2.3 From d3af0f048c114dd53713d5920c54f6d5b6b12139 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 1 Dec 2008 14:23:38 -0800 Subject: [MTD] [NAND] remove excess kernel-doc notation Delete extra kernel-doc notation for struct fields and function parameters that don't exist: Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'wq' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'datbuf' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'oobbuf' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'oobdirty' description in 'nand_chip' Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'data_poi' description in 'nand_chip' Warning(drivers/mtd/nand/nand_base.c:2527): Excess function parameter 'maxchips' description in 'nand_scan_tail' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c0677b8082be..db5b63da2a7e 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -335,17 +335,12 @@ struct nand_buffers { * @erase_cmd: [INTERN] erase command write function, selectable due to AND support * @scan_bbt: [REPLACEABLE] function to scan bad block table * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR) - * @wq: [INTERN] wait queue to sleep on if a NAND operation is in progress * @state: [INTERN] the current state of the NAND device * @oob_poi: poison value buffer * @page_shift: [INTERN] number of address bits in a page (column address bits) * @phys_erase_shift: [INTERN] number of address bits in a physical eraseblock * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry * @chip_shift: [INTERN] number of address bits in one chip - * @datbuf: [INTERN] internal buffer for one page + oob - * @oobbuf: [INTERN] oob buffer for one eraseblock - * @oobdirty: [INTERN] indicates that oob_buf must be reinitialized - * @data_poi: [INTERN] pointer to a data buffer * @options: [BOARDSPECIFIC] various chip options. They can partly be set to inform nand_scan about * special functionality. See the defines for further explanation * @badblockpos: [INTERN] position of the bad block marker in the oob area -- cgit v1.2.3 From 2107fb8b5bf018be691afdd4c6ffaecf0c3307be Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Wed, 5 Nov 2008 00:35:38 +0000 Subject: smsc911x: add dynamic bus configuration Convert the driver to select 16-bit or 32-bit bus access at runtime, at a small performance cost. Signed-off-by: Steve Glendinning Acked-by: Catalin Marinas Signed-off-by: David S. Miller --- include/linux/smsc911x.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h index 47c4ffd10dbb..1cbf0313adde 100644 --- a/include/linux/smsc911x.h +++ b/include/linux/smsc911x.h @@ -28,6 +28,7 @@ struct smsc911x_platform_config { unsigned int irq_polarity; unsigned int irq_type; + unsigned int flags; phy_interface_t phy_interface; }; @@ -39,4 +40,8 @@ struct smsc911x_platform_config { #define SMSC911X_IRQ_TYPE_OPEN_DRAIN 0 #define SMSC911X_IRQ_TYPE_PUSH_PULL 1 +/* Constants for flags */ +#define SMSC911X_USE_16BIT (BIT(0)) +#define SMSC911X_USE_32BIT (BIT(1)) + #endif /* __LINUX_SMSC911X_H__ */ -- cgit v1.2.3 From bd91b8bf372911c1e4d66d6bb44fe409349a6791 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:07:08 -0800 Subject: netns: ip6mr: allocate mroute6_socket per-namespace. Preliminary work to make IPv6 multicast forwarding netns-aware. Make IPv6 multicast forwarding mroute6_socket per-namespace, moves it into struct netns_ipv6. At the moment, mroute6_socket is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 6f4c180179e2..2cd9901ee5c7 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -117,6 +117,7 @@ struct sioc_mif_req6 #include #include /* for struct sk_buff_head */ +#include #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) @@ -232,10 +233,13 @@ struct rtmsg; extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); #ifdef CONFIG_IPV6_MROUTE -extern struct sock *mroute6_socket; +static inline struct sock *mroute6_socket(struct net *net) +{ + return net->ipv6.mroute6_sk; +} extern int ip6mr_sk_done(struct sock *sk); #else -#define mroute6_socket NULL +static inline struct sock *mroute6_socket(struct net *net) { return NULL; } static inline int ip6mr_sk_done(struct sock *sk) { return 0; } #endif #endif -- cgit v1.2.3 From 58701ad41105638baa0b38ffe9ac5b10469c1fd3 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:22:34 -0800 Subject: netns: ip6mr: store netns in struct mfc6_cache This patch stores into struct mfc6_cache the network namespace each mfc6_cache belongs to. The new member is mfc6_net. mfc6_net is assigned at cache allocation and doesn't change during the rest of the cache entry life. This will help to retrieve the current netns around the IPv6 multicast forwarding code. At the moment, all mfc6_cache are allocated in init_net. Changelog: ========== * Use write_pnet()/read_pnet() to set and get mfc6_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 2cd9901ee5c7..15d85fe12bbd 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -188,6 +188,9 @@ struct mif_device struct mfc6_cache { struct mfc6_cache *next; /* Next entry on cache line */ +#ifdef CONFIG_NET_NS + struct net *mfc6_net; +#endif struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */ struct in6_addr mf6c_origin; /* Source of packet */ mifi_t mf6c_parent; /* Source interface */ @@ -210,6 +213,18 @@ struct mfc6_cache } mfc_un; }; +static inline +struct net *mfc6_net(const struct mfc6_cache *mfc) +{ + return read_pnet(&mfc->mfc6_net); +} + +static inline +void mfc6_net_set(struct mfc6_cache *mfc, struct net *net) +{ + write_pnet(&mfc->mfc6_net, hold_net(net)); +} + #define MFC_STATIC 1 #define MFC_NOTIFY 2 -- cgit v1.2.3 From 8229efdaef1e7913ae1712c0ba752f267e5fcd5e Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:30:15 -0800 Subject: netns: ip6mr: enable namespace support in ipv6 multicast forwarding code This last patch makes the appropriate changes to use and propagate the network namespace where needed in IPv6 multicast forwarding code. This consists mainly in replacing all the remaining init_net occurences with current netns pointer retrieved from sockets, net devices or mfc6_caches depending on the routines' contexts. Some routines receive a new 'struct net' parameter to propagate the current netns: * ip6mr_get_route * ip6mr_cache_report * ip6mr_cache_find * ip6mr_cache_unresolved * mif6_add/mif6_delete * ip6mr_mfc_add/ip6mr_mfc_delete * ip6mr_reg_vif All the IPv6 multicast forwarding variables moved to struct netns_ipv6 by the previous patches are now referenced in the correct namespace. Changelog: ========== * Take into account the net associated to mfc6_cache when matching entries in mfc_unres_queue list. * Call mroute_clean_tables() in ip6mr_net_exit() to free memory allocated per-namespace. * Call dev_net_set() in ip6mr_reg_vif() to initialize dev->nd_net correctly. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 15d85fe12bbd..5375faca1f72 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -245,7 +245,8 @@ void mfc6_net_set(struct mfc6_cache *mfc, struct net *net) #ifdef __KERNEL__ struct rtmsg; -extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); +extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, + struct rtmsg *rtm, int nowait); #ifdef CONFIG_IPV6_MROUTE static inline struct sock *mroute6_socket(struct net *net) -- cgit v1.2.3 From 4d4be482a4d78ca906f45e99fd9fdb91e907f5ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:33 -0500 Subject: [XFS] add a FMODE flag to make XFS invisible I/O less hacky XFS has a mode called invisble I/O that doesn't update any of the timestamps. It's used for HSM-style applications and exposed through the nasty open by handle ioctl. Instead of doing directly assignment of file operations that set an internal flag for it add a new FMODE_NOCMTIME flag that we can check in the normal file operations. (addition of the generic VFS flag has been ACKed by Al as an interims solution) Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- include/linux/fs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51bd9370d437..965b9ba3865d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -81,6 +81,14 @@ extern int dir_notify_enable; #define FMODE_WRITE_IOCTL ((__force fmode_t)128) #define FMODE_NDELAY_NOW ((__force fmode_t)256) +/* + * Don't update ctime and mtime. + * + * Currently a special hack for the XFS open_by_handle ioctl, but we'll + * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. + */ +#define FMODE_NOCMTIME ((__force fmode_t)2048) + #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 -- cgit v1.2.3 From c2724775ce57c98b8af9694857b941dc61056516 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Thu, 11 Dec 2008 13:49:59 +0100 Subject: x86, bts: provide in-kernel branch-trace interface Impact: cleanup Move the BTS bits from ptrace.c into ds.c. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4b81fc5f7731..dc5ea65dc716 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1176,6 +1176,7 @@ struct task_struct { * The buffer to hold the BTS data. */ void *bts_buffer; + size_t bts_size; #endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ -- cgit v1.2.3 From bcbc4f20b52c2c40c43a4d2337707dcdfe81bc3a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 9 Dec 2008 23:54:20 +0100 Subject: tracing/function-graph-tracer: annotate do_IRQ and smp_apic_timer_interrupt Impact: move most important x86 irq entry-points to a separate subsection Annotate do_IRQ and smp_apic_timer_interrupt to put them into the .irqentry.text subsection. These function will so be recognized as hardirq entrypoints for the function-graph-tracer. We could also annotate other irq entries but the others are far less important but they can be added on request. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 11cac81eed08..44020f31bd81 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -377,6 +377,16 @@ struct ftrace_graph_ret { */ #define __notrace_funcgraph notrace +/* + * We want to which function is an entrypoint of a hardirq. + * That will help us to put a signal on output. + */ +#define __irq_entry __attribute__((__section__(".irqentry.text"))) + +/* Limits of hardirq entrypoints */ +extern char __irqentry_text_start[]; +extern char __irqentry_text_end[]; + #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of the callback handlers for tracing function graph*/ @@ -414,6 +424,7 @@ static inline void unpause_graph_tracing(void) #else #define __notrace_funcgraph +#define __irq_entry static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } -- cgit v1.2.3 From ee79d1bdb6a10499e53f80b1e8d14110215178ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Dec 2008 18:49:50 +0100 Subject: sched: let arch_update_cpu_topology indicate if topology changed Change arch_update_cpu_topology so it returns 1 if the cpu topology changed and 0 if it didn't change. This will be useful for the next patch which adds a call to this function in partition_sched_domains. Signed-off-by: Heiko Carstens Signed-off-by: Ingo Molnar --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 117f1b7405cf..0c5b5ac36d8e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -49,7 +49,7 @@ for_each_online_node(node) \ if (nr_cpus_node(node)) -void arch_update_cpu_topology(void); +int arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 -- cgit v1.2.3 From 5b37717a23b8e40f6cf7ad85a26ddcf41c171e2c Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Fri, 12 Dec 2008 13:00:06 +0000 Subject: uwb: improved MAS allocator and reservation conflict handling Greatly enhance the MAS allocator: - Handle row and column reservations. - Permit all the available MAS to be allocated. - Follows the WiMedia rules on MAS selection. Take appropriate action when reservation conflicts are detected. - Correctly identify which reservation wins the conflict. - Protect alien BP reservations. - If an owned reservation loses, resize/move it. - Follow the backoff procedure before requesting additional MAS. When reservations are terminated, move the remaining reservations (if necessary) so they keep following the MAS allocation rules. Signed-off-by: Stefano Panella Signed-off-by: David Vrabel --- include/linux/uwb.h | 47 +++++++++++++++++++++++++++++++++++++++---- include/linux/uwb/debug-cmd.h | 2 +- include/linux/uwb/spec.h | 25 +++++++++++++++++++++++ 3 files changed, 69 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h index d7ed5201ade6..c02128991ff7 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -67,6 +67,7 @@ struct uwb_dev { struct uwb_dev_addr dev_addr; int beacon_slot; DECLARE_BITMAP(streams, UWB_NUM_STREAMS); + DECLARE_BITMAP(last_availability_bm, UWB_NUM_MAS); }; #define to_uwb_dev(d) container_of(d, struct uwb_dev, dev) @@ -109,6 +110,9 @@ struct uwbd { */ struct uwb_mas_bm { DECLARE_BITMAP(bm, UWB_NUM_MAS); + DECLARE_BITMAP(unsafe_bm, UWB_NUM_MAS); + int safe; + int unsafe; }; /** @@ -134,14 +138,24 @@ struct uwb_mas_bm { * FIXME: further target states TBD. */ enum uwb_rsv_state { - UWB_RSV_STATE_NONE, + UWB_RSV_STATE_NONE = 0, UWB_RSV_STATE_O_INITIATED, UWB_RSV_STATE_O_PENDING, UWB_RSV_STATE_O_MODIFIED, UWB_RSV_STATE_O_ESTABLISHED, + UWB_RSV_STATE_O_TO_BE_MOVED, + UWB_RSV_STATE_O_MOVE_EXPANDING, + UWB_RSV_STATE_O_MOVE_COMBINING, + UWB_RSV_STATE_O_MOVE_REDUCING, UWB_RSV_STATE_T_ACCEPTED, UWB_RSV_STATE_T_DENIED, + UWB_RSV_STATE_T_CONFLICT, UWB_RSV_STATE_T_PENDING, + UWB_RSV_STATE_T_EXPANDING_ACCEPTED, + UWB_RSV_STATE_T_EXPANDING_CONFLICT, + UWB_RSV_STATE_T_EXPANDING_PENDING, + UWB_RSV_STATE_T_EXPANDING_DENIED, + UWB_RSV_STATE_T_RESIZED, UWB_RSV_STATE_LAST, }; @@ -166,6 +180,12 @@ struct uwb_rsv_target { }; }; +struct uwb_rsv_move { + struct uwb_mas_bm final_mas; + struct uwb_ie_drp *companion_drp_ie; + struct uwb_mas_bm companion_mas; +}; + /* * Number of streams reserved for reservations targeted at DevAddrs. */ @@ -203,6 +223,7 @@ typedef void (*uwb_rsv_cb_f)(struct uwb_rsv *rsv); * * @status: negotiation status * @stream: stream index allocated for this reservation + * @tiebreaker: conflict tiebreaker for this reservation * @mas: reserved MAS * @drp_ie: the DRP IE * @ie_valid: true iff the DRP IE matches the reservation parameters @@ -225,19 +246,22 @@ struct uwb_rsv { enum uwb_drp_type type; int max_mas; int min_mas; - int sparsity; + int max_interval; bool is_multicast; uwb_rsv_cb_f callback; void *pal_priv; enum uwb_rsv_state state; + bool needs_release_companion_mas; u8 stream; + u8 tiebreaker; struct uwb_mas_bm mas; struct uwb_ie_drp *drp_ie; + struct uwb_rsv_move mv; bool ie_valid; struct timer_list timer; - bool expired; + struct work_struct handle_timeout_work; }; static const @@ -279,6 +303,13 @@ struct uwb_drp_avail { bool ie_valid; }; +struct uwb_drp_backoff_win { + u8 window; + u8 n; + int total_expired; + struct timer_list timer; + bool can_reserve_extra_mases; +}; const char *uwb_rsv_state_str(enum uwb_rsv_state state); const char *uwb_rsv_type_str(enum uwb_drp_type type); @@ -294,6 +325,8 @@ void uwb_rsv_terminate(struct uwb_rsv *rsv); void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv); +void uwb_rsv_get_usable_mas(struct uwb_rsv *orig_rsv, struct uwb_mas_bm *mas); + /** * Radio Control Interface instance * @@ -364,12 +397,18 @@ struct uwb_rc { struct uwbd uwbd; + struct uwb_drp_backoff_win bow; struct uwb_drp_avail drp_avail; struct list_head reservations; + struct list_head cnflt_alien_list; + struct uwb_mas_bm cnflt_alien_bitmap; struct mutex rsvs_mutex; + spinlock_t rsvs_lock; struct workqueue_struct *rsv_workq; - struct work_struct rsv_update_work; + struct delayed_work rsv_update_work; + struct delayed_work rsv_alien_bp_work; + int set_drp_ie_pending; struct mutex ies_mutex; struct uwb_rc_cmd_set_ie *ies; size_t ies_capacity; diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h index 07efbe17db53..8da004e25628 100644 --- a/include/linux/uwb/debug-cmd.h +++ b/include/linux/uwb/debug-cmd.h @@ -43,7 +43,7 @@ struct uwb_dbg_cmd_rsv_establish { __u8 type; __u16 max_mas; __u16 min_mas; - __u8 sparsity; + __u8 max_interval; }; struct uwb_dbg_cmd_rsv_terminate { diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h index a30436ea53aa..b52e44f1bd33 100644 --- a/include/linux/uwb/spec.h +++ b/include/linux/uwb/spec.h @@ -58,6 +58,11 @@ enum { UWB_NUM_ZONES = 16 }; */ #define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES) +/* + * Number of MAS required before a row can be considered available. + */ +#define UWB_USABLE_MAS_PER_ROW (UWB_NUM_ZONES - 1) + /* * Number of streams per DRP reservation between a pair of devices. * @@ -93,6 +98,26 @@ enum { UWB_BEACON_SLOT_LENGTH_US = 85 }; */ enum { UWB_MAX_LOST_BEACONS = 3 }; +/* + * mDRPBackOffWinMin + * + * The minimum number of superframes to wait before trying to reserve + * extra MAS. + * + * [ECMA-368] section 17.16 + */ +enum { UWB_DRP_BACKOFF_WIN_MIN = 2 }; + +/* + * mDRPBackOffWinMax + * + * The maximum number of superframes to wait before trying to reserve + * extra MAS. + * + * [ECMA-368] section 17.16 + */ +enum { UWB_DRP_BACKOFF_WIN_MAX = 16 }; + /* * Length of a superframe in microseconds. */ -- cgit v1.2.3 From 27af4245b6ce99e08c6a7c38825383bf51119cc9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Dec 2008 14:18:13 -0800 Subject: posix-timers: use "struct pid*" instead of "struct task_struct*" Impact: restructure, clean up code k_itimer holds the ref to the ->it_process until sys_timer_delete(). This allows to pin up to RLIMIT_SIGPENDING dead task_struct's. Change the code to use "struct pid *" instead. The patch doesn't kill ->it_process, it places ->it_pid into the union. ->it_process is still used by do_cpu_nanosleep() as before. It would be trivial to change the nanosleep code as well, but since it uses it_process in a special way I think it is better to keep this field for grep. The patch bloats the kernel by 104 bytes and it also adds the new pointer, ->it_signal, to k_itimer. It is used by lock_timer() to verify that the found timer was not created by another process. It is not clear why do we use the global database (and thus the global idr_lock) for posix timers. We still need the signal_struct->posix_timers which contains all useable timers, perhaps it is better to use some form of per-process array instead. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a7c721355549..4f71bf4e628c 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -45,7 +45,11 @@ struct k_itimer { int it_requeue_pending; /* waiting to requeue this timer */ #define REQUEUE_PENDING 1 int it_sigev_notify; /* notify word of sigevent struct */ - struct task_struct *it_process; /* process to send signal to */ + struct signal_struct *it_signal; + union { + struct pid *it_pid; /* pid of process to send signal to */ + struct task_struct *it_process; /* for clock_nanosleep */ + }; struct sigqueue *sigq; /* signal queue entry. */ union { struct { -- cgit v1.2.3 From c29541b24fb2c6301021637229ae5347c877330a Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 1 Dec 2008 14:18:11 -0800 Subject: linux/timex.h: cleanup for userspace Impact: fix user-space exported use Move all the kernel-specific defines and includes into the __KERNEL__ section so that they don't get leaked into userspace. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 73 ++++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 9007313b5b71..998a55d80acf 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,46 +53,10 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H -#include #include -#include - #define NTP_API 4 /* NTP API version */ -/* - * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen - * for a slightly underdamped convergence characteristic. SHIFT_KH - * establishes the damping of the FLL and is chosen by wisdom and black - * art. - * - * MAXTC establishes the maximum time constant of the PLL. With the - * SHIFT_KG and SHIFT_KF values given and a time constant range from - * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours, - * respectively. - */ -#define SHIFT_PLL 4 /* PLL frequency factor (shift) */ -#define SHIFT_FLL 2 /* FLL frequency factor (shift) */ -#define MAXTC 10 /* maximum time constant (shift) */ - -/* - * SHIFT_USEC defines the scaling (shift) of the time_freq and - * time_tolerance variables, which represent the current frequency - * offset and maximum frequency tolerance. - */ -#define SHIFT_USEC 16 /* frequency offset scale (shift) */ -#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) -#define PPM_SCALE_INV_SHIFT 19 -#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ - PPM_SCALE + 1) - -#define MAXPHASE 500000000l /* max phase error (ns) */ -#define MAXFREQ 500000 /* max frequency error (ns/s) */ -#define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT) -#define MINSEC 256 /* min interval between updates (s) */ -#define MAXSEC 2048 /* max interval between updates (s) */ -#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */ - /* * syscall interface - used (mainly by NTP daemon) * to discipline kernel clock oscillator @@ -199,8 +163,45 @@ struct timex { #define TIME_BAD TIME_ERROR /* bw compat */ #ifdef __KERNEL__ +#include +#include +#include + #include +/* + * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen + * for a slightly underdamped convergence characteristic. SHIFT_KH + * establishes the damping of the FLL and is chosen by wisdom and black + * art. + * + * MAXTC establishes the maximum time constant of the PLL. With the + * SHIFT_KG and SHIFT_KF values given and a time constant range from + * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours, + * respectively. + */ +#define SHIFT_PLL 4 /* PLL frequency factor (shift) */ +#define SHIFT_FLL 2 /* FLL frequency factor (shift) */ +#define MAXTC 10 /* maximum time constant (shift) */ + +/* + * SHIFT_USEC defines the scaling (shift) of the time_freq and + * time_tolerance variables, which represent the current frequency + * offset and maximum frequency tolerance. + */ +#define SHIFT_USEC 16 /* frequency offset scale (shift) */ +#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) +#define PPM_SCALE_INV_SHIFT 19 +#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ + PPM_SCALE + 1) + +#define MAXPHASE 500000000l /* max phase error (ns) */ +#define MAXFREQ 500000 /* max frequency error (ns/s) */ +#define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT) +#define MINSEC 256 /* min interval between updates (s) */ +#define MAXSEC 2048 /* max interval between updates (s) */ +#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */ + /* * kernel variables * Note: maximum error = NTP synch distance = dispersion + delay / 2; -- cgit v1.2.3 From bb608e9db7d29616fb6e0d856c23434610d4a1bd Mon Sep 17 00:00:00 2001 From: Senthil Balasubramanian Date: Thu, 4 Dec 2008 20:38:13 +0530 Subject: wireless: Incorrect LEAP authentication algorithm identifier. This patch fixes a regression introduced by "wireless: avoid some net/ieee80211.h vs. linux/ieee80211.h conflicts" LEAP authentication algorithm identifier should be 128. Signed-off-by: Senthil Balasubramanian Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a6ec928186ad..c4e6ca1a6306 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -836,7 +836,7 @@ struct ieee80211_ht_info { /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 -#define WLAN_AUTH_LEAP 2 +#define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 -- cgit v1.2.3 From 4dec9b807be757780ca3611a959ac22c28d292a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 10 Dec 2008 17:48:48 +0100 Subject: rfkill: strip pointless notifier chain No users, so no reason to have it. Signed-off-by: Johannes Berg Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index f376a93927f7..164332cbb77c 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -149,11 +149,4 @@ static inline char *rfkill_get_led_name(struct rfkill *rfkill) #endif } -/* rfkill notification chain */ -#define RFKILL_STATE_CHANGED 0x0001 /* state of a normal rfkill - switch has changed */ - -int register_rfkill_notifier(struct notifier_block *nb); -int unregister_rfkill_notifier(struct notifier_block *nb); - #endif /* RFKILL_H */ -- cgit v1.2.3 From 29c0177e6a4ac094302bed54a1d4bbb6b740a9ef Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 13 Dec 2008 21:20:25 +1030 Subject: cpumask: change cpumask_scnprintf, cpumask_parse_user, cpulist_parse, and cpulist_scnprintf to take pointers. Impact: change calling convention of existing cpumask APIs Most cpumask functions started with cpus_: these have been replaced by cpumask_ ones which take struct cpumask pointers as expected. These four functions don't have good replacement names; fortunately they're rarely used, so we just change them over. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ingo Molnar Cc: paulus@samba.org Cc: mingo@redhat.com Cc: tony.luck@intel.com Cc: ralf@linux-mips.org Cc: Greg Kroah-Hartman Cc: cl@linux-foundation.org Cc: srostedt@redhat.com --- include/linux/cpumask.h | 87 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 21e1dd43e52a..94a2ab88ae85 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -339,36 +339,6 @@ extern cpumask_t cpu_mask_all; #endif #define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v) -#define cpumask_scnprintf(buf, len, src) \ - __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) -static inline int __cpumask_scnprintf(char *buf, int len, - const cpumask_t *srcp, int nbits) -{ - return bitmap_scnprintf(buf, len, srcp->bits, nbits); -} - -#define cpumask_parse_user(ubuf, ulen, dst) \ - __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) -static inline int __cpumask_parse_user(const char __user *buf, int len, - cpumask_t *dstp, int nbits) -{ - return bitmap_parse_user(buf, len, dstp->bits, nbits); -} - -#define cpulist_scnprintf(buf, len, src) \ - __cpulist_scnprintf((buf), (len), &(src), NR_CPUS) -static inline int __cpulist_scnprintf(char *buf, int len, - const cpumask_t *srcp, int nbits) -{ - return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); -} - -#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS) -static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits) -{ - return bitmap_parselist(buf, dstp->bits, nbits); -} - #define cpu_remap(oldbit, old, new) \ __cpu_remap((oldbit), &(old), &(new), NR_CPUS) static inline int __cpu_remap(int oldbit, @@ -945,6 +915,63 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_of(cpu) (get_cpu_mask(cpu)) +/** + * cpumask_scnprintf - print a cpumask into a string as comma-separated hex + * @buf: the buffer to sprintf into + * @len: the length of the buffer + * @srcp: the cpumask to print + * + * If len is zero, returns zero. Otherwise returns the length of the + * (nul-terminated) @buf string. + */ +static inline int cpumask_scnprintf(char *buf, int len, + const struct cpumask *srcp) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits); +} + +/** + * cpumask_parse_user - extract a cpumask from a user string + * @buf: the buffer to extract from + * @len: the length of the buffer + * @dstp: the cpumask to set. + * + * Returns -errno, or 0 for success. + */ +static inline int cpumask_parse_user(const char __user *buf, int len, + struct cpumask *dstp) +{ + return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits); +} + +/** + * cpulist_scnprintf - print a cpumask into a string as comma-separated list + * @buf: the buffer to sprintf into + * @len: the length of the buffer + * @srcp: the cpumask to print + * + * If len is zero, returns zero. Otherwise returns the length of the + * (nul-terminated) @buf string. + */ +static inline int cpulist_scnprintf(char *buf, int len, + const struct cpumask *srcp) +{ + return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits); +} + +/** + * cpulist_parse_user - extract a cpumask from a user string of ranges + * @buf: the buffer to extract from + * @len: the length of the buffer + * @dstp: the cpumask to set. + * + * Returns -errno, or 0 for success. + */ +static inline int cpulist_parse(const char *buf, struct cpumask *dstp) +{ + return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits); +} + /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap -- cgit v1.2.3 From 0de26520c7cabf36e1de090ea8092f011a6106ce Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 13 Dec 2008 21:20:26 +1030 Subject: cpumask: make irq_set_affinity() take a const struct cpumask Impact: change existing irq_chip API Not much point with gentle transition here: the struct irq_chip's setaffinity method signature needs to change. Fortunately, not widely used code, but hits a few architectures. Note: In irq_select_affinity() I save a temporary in by mangling irq_desc[irq].affinity directly. Ingo, does this break anything? (Folded in fix from KOSAKI Motohiro) Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Reviewed-by: Grant Grundler Acked-by: Ingo Molnar Cc: ralf@linux-mips.org Cc: grundler@parisc-linux.org Cc: jeremy@xensource.com Cc: KOSAKI Motohiro --- include/linux/interrupt.h | 4 ++-- include/linux/irq.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f58a0cf8929a..48e63934fabe 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -109,13 +109,13 @@ extern void enable_irq(unsigned int irq); extern cpumask_t irq_default_affinity; -extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); +extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); #else /* CONFIG_SMP */ -static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask) +static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) { return -EINVAL; } diff --git a/include/linux/irq.h b/include/linux/irq.h index 3dddfa703ebd..ab70fd604d3a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -113,7 +113,8 @@ struct irq_chip { void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, cpumask_t dest); + void (*set_affinity)(unsigned int irq, + const struct cpumask *dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); int (*set_wake)(unsigned int irq, unsigned int on); -- cgit v1.2.3 From 320ab2b0b1e08e3805a3e1084a2f0eb1938d5d67 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 13 Dec 2008 21:20:26 +1030 Subject: cpumask: convert struct clock_event_device to cpumask pointers. Impact: change calling convention of existing clock_event APIs struct clock_event_timer's cpumask field gets changed to take pointer, as does the ->broadcast function. Another single-patch change. For safety, we BUG_ON() in clockevents_register_device() if it's not set. Signed-off-by: Rusty Russell Cc: Ingo Molnar --- include/linux/clockchips.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index ed3a5d473e52..cea153697ec7 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -82,13 +82,13 @@ struct clock_event_device { int shift; int rating; int irq; - cpumask_t cpumask; + const struct cpumask *cpumask; int (*set_next_event)(unsigned long evt, struct clock_event_device *); void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); void (*event_handler)(struct clock_event_device *); - void (*broadcast)(cpumask_t mask); + void (*broadcast)(const struct cpumask *mask); struct list_head list; enum clock_event_mode mode; ktime_t next_event; -- cgit v1.2.3 From 7be7585393d311866653564fbcd10a3232773c0b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 13 Dec 2008 21:20:28 +1030 Subject: cpumask: Use all NR_CPUS bits unless CONFIG_CPUMASK_OFFSTACK Impact: futureproof as we convert more code to new APIs The old cpumask operators treat all NR_CPUS bits as relevent, the new ones use nr_cpumask_bits. For large NR_CPUS and small nr_cpu_ids, this makes a difference. However, mixing the two can cause problems with undefined bits. An arch which sets CONFIG_CPUMASK_OFFSTACK should have converted across to the new operators, so it's safe in that case. (Thanks to Stephen Rothwell for bisecting the initial unused-bits bug, and Mike Travis for this solution). Signed-off-by: Rusty Russell Cc: Mike Travis --- include/linux/cpumask.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 94a2ab88ae85..d4bf52603e6b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -510,9 +510,6 @@ extern cpumask_t cpu_active_map; [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } -/* This produces more efficient code. */ -#define nr_cpumask_bits NR_CPUS - #else /* NR_CPUS > BITS_PER_LONG */ #define CPU_BITS_ALL \ @@ -520,9 +517,15 @@ extern cpumask_t cpu_active_map; [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } +#endif /* NR_CPUS > BITS_PER_LONG */ +#ifdef CONFIG_CPUMASK_OFFSTACK +/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, + * not all bits may be allocated. */ #define nr_cpumask_bits nr_cpu_ids -#endif /* NR_CPUS > BITS_PER_LONG */ +#else +#define nr_cpumask_bits NR_CPUS +#endif /* verify cpu argument to cpumask_* operators */ static inline unsigned int cpumask_check(unsigned int cpu) -- cgit v1.2.3 From b690ace50be7d10d77cb7a6d5ef1bd9de649852f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 21 Oct 2008 14:07:03 +0100 Subject: [ARM] S3C6400: serial support for S3C6400 and S3C6410 SoCs Add support to the Samsung serial driver for the S3C6400 and S3C6410 serial ports. Signed-off-by: Ben Dooks --- include/linux/serial_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 4e4f1277f3bf..feb3b939ec4b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -158,6 +158,8 @@ /* SH-SCI */ #define PORT_SCIFA 83 +#define PORT_S3C6400 84 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From b53c7583e26746ef6f66c866841e10450150ed8e Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 4 Dec 2008 10:01:52 -0800 Subject: rapidio: struct device - replace bus_id with dev_name(), dev_set_name() Cc: Matt Porter Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman Signed-off-by: Paul Mackerras --- include/linux/rio_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 90987b7bcc1b..32c0547ffafc 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -427,9 +427,9 @@ void rio_dev_put(struct rio_dev *); * Get the unique RIO device identifier. Returns the device * identifier string. */ -static inline char *rio_name(struct rio_dev *rdev) +static inline const char *rio_name(struct rio_dev *rdev) { - return rdev->dev.bus_id; + return dev_name(&rdev->dev); } /** -- cgit v1.2.3 From 1a881f27c50b4fbd6858a8696a189263621136b0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:27:47 -0800 Subject: net: Add frag_list support to GSO This patch allows GSO to handle frag_list in a limited way for the purposes of allowing packets merged by GRO to be refragmented on output. Most hardware won't (and aren't expected to) support handling GRO frag_list packets directly. Therefore we will perform GSO in software for those cases. However, for drivers that can support it (such as virtual NICs) we may not have to segment the packets at all. Whether the added overhead of GRO/GSO is worthwhile for bridges and routers when weighed against the benefit of potentially increasing the MTU within the host is still an open question. However, for the case of host nodes this is undoubtedly a win. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b60c26b7d31c..bdf5465deb91 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1858,6 +1858,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || + (skb_shinfo(skb)->frag_list && + !(dev->features & NETIF_F_FRAGLIST)) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } -- cgit v1.2.3 From d565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:38:52 -0800 Subject: net: Add Generic Receive Offload infrastructure This patch adds the top-level GRO (Generic Receive Offload) infrastructure. This is pretty similar to LRO except that this is protocol-independent. Instead of holding packets in an lro_mgr structure, they're now held in napi_struct. For drivers that intend to use this, they can set the NETIF_F_GRO bit and call napi_gro_receive instead of netif_receive_skb or just call netif_rx. The latter will call napi_receive_skb automatically. When napi_gro_receive is used, the driver must either call napi_complete/napi_rx_complete, or call napi_gro_flush in softirq context if the driver uses the primitives __napi_complete/__napi_rx_complete. Protocols will set the gro_receive and gro_complete function pointers in order to participate in this scheme. In addition to the packet, gro_receive will get a list of currently held packets. Each packet in the list has a same_flow field which is non-zero if it is a potential match for the new packet. For each packet that may match, they also have a flush field which is non-zero if the held packet must not be merged with the new packet. Once gro_receive has determined that the new skb matches a held packet, the held packet may be processed immediately if the new skb cannot be merged with it. In this case gro_receive should return the pointer to the existing skb in gro_list. Otherwise the new skb should be merged into the existing packet and NULL should be returned, unless the new skb makes it impossible for any further merges to be made (e.g., FIN packet) where the merged skb should be returned. Whenever the skb is merged into an existing entry, the gro_receive function should set NAPI_GRO_CB(skb)->same_flow. Note that if an skb merely matches an existing entry but can't be merged with it, then this shouldn't be set. If gro_receive finds it pointless to hold the new skb for future merging, it should set NAPI_GRO_CB(skb)->flush. Held packets will be flushed by napi_gro_flush which is called by napi_complete and napi_rx_complete. Currently held packets are stored in a singly liked list just like LRO. The list is limited to a maximum of 8 entries. In future, this may be expanded to use a hash table to allow more flows to be held for merging. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 80 +++++++++++++++++------------------------------ include/linux/netpoll.h | 5 --- 2 files changed, 28 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bdf5465deb91..58856b6737fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,8 +314,9 @@ struct napi_struct { spinlock_t poll_lock; int poll_owner; struct net_device *dev; - struct list_head dev_list; #endif + struct list_head dev_list; + struct sk_buff *gro_list; }; enum @@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi) * * Mark NAPI processing as complete. */ -static inline void __napi_complete(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - list_del(&n->poll_list); - smp_mb__before_clear_bit(); - clear_bit(NAPI_STATE_SCHED, &n->state); -} - -static inline void napi_complete(struct napi_struct *n) -{ - unsigned long flags; - - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); -} +extern void __napi_complete(struct napi_struct *n); +extern void napi_complete(struct napi_struct *n); /** * napi_disable - prevent NAPI from scheduling @@ -640,9 +627,7 @@ struct net_device unsigned long state; struct list_head dev_list; -#ifdef CONFIG_NETPOLL struct list_head napi_list; -#endif /* Net device features */ unsigned long features; @@ -661,6 +646,7 @@ struct net_device #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ /* do not use LLTX in new drivers */ #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ +#define NETIF_F_GRO 16384 /* Generic receive offload */ #define NETIF_F_LRO 32768 /* large receive offload */ /* Segmentation offload features */ @@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev) * netif_napi_add() must be used to initialize a napi context prior to calling * *any* of the other napi related functions. */ -static inline void netif_napi_add(struct net_device *dev, - struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), - int weight) -{ - INIT_LIST_HEAD(&napi->poll_list); - napi->poll = poll; - napi->weight = weight; -#ifdef CONFIG_NETPOLL - napi->dev = dev; - list_add(&napi->dev_list, &dev->napi_list); - spin_lock_init(&napi->poll_lock); - napi->poll_owner = -1; -#endif - set_bit(NAPI_STATE_SCHED, &napi->state); -} +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight); /** * netif_napi_del - remove a napi context @@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev, * * netif_napi_del() removes a napi context from the network device napi list */ -static inline void netif_napi_del(struct napi_struct *napi) -{ -#ifdef CONFIG_NETPOLL - list_del(&napi->dev_list); -#endif -} +void netif_napi_del(struct napi_struct *napi); + +struct napi_gro_cb { + /* This is non-zero if the packet may be of the same flow. */ + int same_flow; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + int flush; + + /* Number of segments aggregated. */ + int count; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) struct packet_type { __be16 type; /* This is really htons(ether_type). */ @@ -1024,6 +1004,9 @@ struct packet_type { struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); int (*gso_send_check)(struct sk_buff *skb); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); void *af_packet_priv; struct list_head list; }; @@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); +extern int napi_gro_receive(struct napi_struct *napi, + struct sk_buff *skb); extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); @@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev, static inline void netif_rx_complete(struct net_device *dev, struct napi_struct *napi) { - unsigned long flags; - - /* - * don't let napi dequeue from the cpu poll list - * just in case its running on a different cpu - */ - if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) - return; - local_irq_save(flags); - __netif_rx_complete(dev, napi); - local_irq_restore(flags); + napi_complete(napi); } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e3d79593fb3a..e38d3c9dccda 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have) rcu_read_unlock(); } -static inline void netpoll_netdev_init(struct net_device *dev) -{ - INIT_LIST_HEAD(&dev->napi_list); -} - #else static inline int netpoll_rx(struct sk_buff *skb) { -- cgit v1.2.3 From 71d93b39e52e92aea35f1058d957cf12250d0b75 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:42:33 -0800 Subject: net: Add skb_gro_receive This patch adds the helper skb_gro_receive to merge packets for GRO. The current method is to allocate a new header skb and then chain the original packets to its frag_list. This is done to make it easier to integrate into the existing GSO framework. In future as GSO is moved into the drivers, we can undo this and simply chain the original packets together. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acf17af45af9..cf2cb50f77d1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1687,6 +1687,8 @@ extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); +extern int skb_gro_receive(struct sk_buff **head, + struct sk_buff *skb); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) -- cgit v1.2.3 From b240a0e5644eb817c4a397098a40e1ad42a615bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:44:31 -0800 Subject: ethtool: Add GGRO and SGRO ops This patch adds the ethtool ops to enable and disable GRO. It also makes GRO depend on RX checksum offload much the same as how TSO depends on SG support. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b4b038b89ee6..27c67a542235 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -467,6 +467,8 @@ struct ethtool_ops { #define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */ #define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */ +#define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */ +#define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET -- cgit v1.2.3 From e18ce3465477502108187c6c08b6423fb784a313 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 16 Dec 2008 02:00:00 -0800 Subject: net: Move flow control definitions to mii.h flags used within drivers for indicating tx and rx flow control are defined in 4 drivers (and probably more), move these constants to mii.h. The 3 SMSC drivers use the same constants (FLOW_CTRL_TX), but TG3 uses TG3_FLOW_CTRL_TX, so this patch also renames the constants within TG3. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- include/linux/mii.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 151b7e0182c7..4a376e0816fd 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -135,6 +135,10 @@ #define LPA_1000FULL 0x0800 /* Link partner 1000BASE-T full duplex */ #define LPA_1000HALF 0x0400 /* Link partner 1000BASE-T half duplex */ +/* Flow control flags */ +#define FLOW_CTRL_TX 0x01 +#define FLOW_CTRL_RX 0x02 + /* This structure is used in all SIOCxMIIxxx ioctl calls */ struct mii_ioctl_data { __u16 phy_id; -- cgit v1.2.3 From bc02ff95fe4ebd3e5ee7455c0aa6f76ebe39ebca Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 16 Dec 2008 02:00:48 -0800 Subject: net: Refactor full duplex flow control resolution These 4 drivers have identical full duplex flow control resolution functions. This patch changes them all to use one common function. The function in question decides whether a device should enable TX and RX flow control in a standard way (IEEE 802.3-2005 table 28B-3), so this should also be useful for other drivers. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- include/linux/mii.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 4a376e0816fd..ad748588faf1 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -239,5 +239,34 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock, return 0; } +/** + * mii_resolve_flowctrl_fdx + * @lcladv: value of MII ADVERTISE register + * @rmtadv: value of MII LPA register + * + * Resolve full duplex flow control as per IEEE 802.3-2005 table 28B-3 + */ +static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv) +{ + u8 cap = 0; + + if (lcladv & ADVERTISE_PAUSE_CAP) { + if (lcladv & ADVERTISE_PAUSE_ASYM) { + if (rmtadv & LPA_PAUSE_CAP) + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + else if (rmtadv & LPA_PAUSE_ASYM) + cap = FLOW_CTRL_RX; + } else { + if (rmtadv & LPA_PAUSE_CAP) + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + } + } else if (lcladv & ADVERTISE_PAUSE_ASYM) { + if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) + cap = FLOW_CTRL_TX; + } + + return cap; +} + #endif /* __KERNEL__ */ #endif /* __LINUX_MII_H__ */ -- cgit v1.2.3 From b24a2516d10751d7ed5afb58420df25370c9dffb Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Tue, 16 Dec 2008 02:06:23 -0800 Subject: ipv6: Add IPV6_PKTINFO sticky option support to setsockopt() There are three reasons for me to add this support: 1.When no interface is specified in an IPV6_PKTINFO ancillary data item, the interface specified in an IPV6_PKTINFO sticky optionis is used. RFC3542: 6.7. Summary of Outgoing Interface Selection This document and [RFC-3493] specify various methods that affect the selection of the packet's outgoing interface. This subsection summarizes the ordering among those in order to ensure deterministic behavior. For a given outgoing packet on a given socket, the outgoing interface is determined in the following order: 1. if an interface is specified in an IPV6_PKTINFO ancillary data item, the interface is used. 2. otherwise, if an interface is specified in an IPV6_PKTINFO sticky option, the interface is used. 2.When no IPV6_PKTINFO ancillary data is received,getsockopt() should return the sticky option value which set with setsockopt(). RFC 3542: Issuing getsockopt() for the above options will return the sticky option value i.e., the value set with setsockopt(). If no sticky option value has been set getsockopt() will return the following values: 3.Make the setsockopt implementation POSIX compliant. Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 641e026eee8f..0b816cae533e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -278,6 +278,7 @@ struct ipv6_pinfo { struct in6_addr saddr; struct in6_addr rcv_saddr; struct in6_addr daddr; + struct in6_pktinfo sticky_pktinfo; struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES struct in6_addr *saddr_cache; -- cgit v1.2.3 From 8c5df16bec8a60bb8589fc232b9e26cac0ed4b2c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 16 Dec 2008 12:17:26 -0800 Subject: swiotlb: allow architectures to override swiotlb pool allocation Impact: generalize swiotlb allocation code Architectures may need to allocate memory specially for use with the swiotlb. Create the weak function swiotlb_alloc_boot() and swiotlb_alloc() defaulting to the current behaviour. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b18ec5533e8c..b8c5fc766a56 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -10,6 +10,9 @@ struct scatterlist; extern void swiotlb_init(void); +extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs); +extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); + extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); -- cgit v1.2.3 From 0016fdee927f7aa0f428494bcf11ae60c7470a02 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 16 Dec 2008 12:17:27 -0800 Subject: swiotlb: move some definitions to header Impact: cleanup Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b8c5fc766a56..58b996a642f9 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,6 +7,20 @@ struct device; struct dma_attrs; struct scatterlist; +/* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? + * The complexity of {map,unmap}_single is linearly dependent on this value. + */ +#define IO_TLB_SEGSIZE 128 + + +/* + * log of the size of each IO TLB slab. The number of slabs is command line + * controllable. + */ +#define IO_TLB_SHIFT 11 + extern void swiotlb_init(void); -- cgit v1.2.3 From b31a1d8b41513b96e9c7ec2f68c5734cef0b26a4 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Tue, 16 Dec 2008 15:29:15 -0800 Subject: gianfar: Convert gianfar to an of_platform_driver Does the same for the accompanying MDIO driver, and then modifies the TBI configuration method. The old way used fields in einfo, which no longer exists. The new way is to create an MDIO device-tree node for each instance of gianfar, and create a tbi-handle property to associate ethernet controllers with the TBI PHYs they are connected to. Signed-off-by: Andy Fleming Signed-off-by: David S. Miller --- include/linux/fsl_devices.h | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 708bab58d8d0..d9051d717d27 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -47,12 +47,7 @@ struct gianfar_platform_data { /* device specific information */ u32 device_flags; - /* board specific information */ - u32 board_flags; - int mdio_bus; /* Bus controlled by us */ - char bus_id[MII_BUS_ID_SIZE]; /* Bus PHY is on */ - u32 phy_id; - u8 mac_addr[6]; + char bus_id[BUS_ID_SIZE]; phy_interface_t interface; }; @@ -61,17 +56,6 @@ struct gianfar_mdio_data { int irq[32]; }; -/* Flags related to gianfar device features */ -#define FSL_GIANFAR_DEV_HAS_GIGABIT 0x00000001 -#define FSL_GIANFAR_DEV_HAS_COALESCE 0x00000002 -#define FSL_GIANFAR_DEV_HAS_RMON 0x00000004 -#define FSL_GIANFAR_DEV_HAS_MULTI_INTR 0x00000008 -#define FSL_GIANFAR_DEV_HAS_CSUM 0x00000010 -#define FSL_GIANFAR_DEV_HAS_VLAN 0x00000020 -#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH 0x00000040 -#define FSL_GIANFAR_DEV_HAS_PADDING 0x00000080 -#define FSL_GIANFAR_DEV_HAS_MAGIC_PACKET 0x00000100 - /* Flags in gianfar_platform_data */ #define FSL_GIANFAR_BRD_HAS_PHY_INTR 0x00000001 /* set or use a timer */ #define FSL_GIANFAR_BRD_IS_REDUCED 0x00000002 /* Set if RGMII, RMII */ -- cgit v1.2.3 From e08e1f7adba522378e8d2ae941bf25443866136d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 16 Dec 2008 12:17:30 -0800 Subject: swiotlb: allow architectures to override phys<->bus<->phys conversions Impact: generalize phys<->bus<->phys conversions in the swiotlb code Architectures may need to override these conversions. Implement a __weak hook point containing the default implementation. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 58b996a642f9..694f1839cbc0 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -27,6 +27,9 @@ swiotlb_init(void); extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs); extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); +extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address); +extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); + extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); -- cgit v1.2.3 From b81ea27b2329bf44b30c427800954f845896d476 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 16 Dec 2008 12:17:31 -0800 Subject: swiotlb: add arch hook to force mapping Impact: generalize the sw-IOTLB range checks Some architectures require special rules to determine whether a range needs mapping or not. This adds a weak function for architectures to override. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 694f1839cbc0..325af1de0351 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -30,6 +30,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address); extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); +extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size); + extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); -- cgit v1.2.3 From 2d91d78b68606ff7ce52ea70e187dee7831aa2f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 17 Dec 2008 15:47:29 -0800 Subject: Phonet: allocate a non-Ethernet ARP type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also leave some room for more 802.11 types. Signed-off-by: RĂ©mi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 4d3401812e6c..11df77ab2dbb 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -87,6 +87,8 @@ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ +#define ARPHRD_PHONET 820 /* PhoNet media type */ + #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ -- cgit v1.2.3 From 57c81fffc863fb4c1804bc963bcbfb82d736c6df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 17 Dec 2008 15:47:48 -0800 Subject: Phonet: allocate separate ARP type for GPRS over a Phonet pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A separate xmit lock class supports GPRS over a Phonet pipe over a TUN device (type ARPHRD_NONE). Signed-off-by: RĂ©mi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 11df77ab2dbb..5ff89809a581 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -88,6 +88,7 @@ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ #define ARPHRD_PHONET 820 /* PhoNet media type */ +#define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ -- cgit v1.2.3 From f38f1d2aa5a3520cf05da7cd6bd12fe2b0c509b7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 16 Dec 2008 23:06:40 -0500 Subject: trace: add a way to enable or disable the stack tracer Impact: enhancement to stack tracer The stack tracer currently is either on when configured in or off when it is not. It can not be disabled when it is configured on. (besides disabling the function tracer that it uses) This patch adds a way to enable or disable the stack tracer at run time. It defaults off on bootup, but a kernel parameter 'stacktrace' has been added to enable it on bootup. A new sysctl has been added "kernel.stack_tracer_enabled" to let the user enable or disable the stack tracer at run time. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 44020f31bd81..6b0db53caa7d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -86,6 +86,14 @@ static inline void ftrace_stop(void) { } static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_STACK_TRACER +extern int stack_tracer_enabled; +int +stack_trace_sysctl(struct ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + #ifdef CONFIG_DYNAMIC_FTRACE /* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ #include -- cgit v1.2.3 From 9c2c48020ec0dd6ecd27e5a1298f73b40d85a595 Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Tue, 16 Dec 2008 23:41:22 -0800 Subject: schedstat: consolidate per-task cpu runtime stats Impact: simplify code When we turn on CONFIG_SCHEDSTATS, per-task cpu runtime is accumulated twice. Once in task->se.sum_exec_runtime and once in sched_info.cpu_time. These two stats are exactly the same. Given that task->se.sum_exec_runtime is always accumulated by the core scheduler, sched_info can reuse that data instead of duplicate the accounting. Signed-off-by: Ken Chen Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8cccd6dc5d66..2d1e840ddd35 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -670,8 +670,7 @@ struct reclaim_state; struct sched_info { /* cumulative counters */ unsigned long pcount; /* # of times run on this cpu */ - unsigned long long cpu_time, /* time spent on the cpu */ - run_delay; /* time spent waiting on a runqueue */ + unsigned long long run_delay; /* time spent waiting on a runqueue */ /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ -- cgit v1.2.3 From 40aa4a30d0fd075fb934de4ee8163056827052ab Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 16 Dec 2008 10:15:12 +0000 Subject: ASoC: Add WM8350 AudioPlus codec driver The WM8350 is an integrated audio and power management subsystem which provides a single-chip solution for portable audio and multimedia systems. The integrated audio CODEC provides all the necessary functions for high-quality stereo recording and playback. Programmable on-chip amplifiers allow for the direct connection of headphones and microphones with a minimum of external components. A programmable low-noise bias voltage is available to feed one or more electret microphones. Additional audio features include programmable high-pass filter in the ADC input path. This driver was originally written by Liam Girdwood with further updates from me. Signed-off-by: Mark Brown --- include/linux/mfd/wm8350/audio.h | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/audio.h b/include/linux/mfd/wm8350/audio.h index 217bb22ebb8e..af95a1d2f3a1 100644 --- a/include/linux/mfd/wm8350/audio.h +++ b/include/linux/mfd/wm8350/audio.h @@ -1,7 +1,7 @@ /* * audio.h -- Audio Driver for Wolfson WM8350 PMIC * - * Copyright 2007 Wolfson Microelectronics PLC + * Copyright 2007, 2008 Wolfson Microelectronics PLC * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -70,9 +70,9 @@ #define WM8350_CODEC_ISEL_0_5 3 /* x0.5 */ #define WM8350_VMID_OFF 0 -#define WM8350_VMID_500K 1 -#define WM8350_VMID_100K 2 -#define WM8350_VMID_10K 3 +#define WM8350_VMID_300K 1 +#define WM8350_VMID_50K 2 +#define WM8350_VMID_5K 3 /* * R40 (0x28) - Clock Control 1 @@ -591,8 +591,38 @@ #define WM8350_IRQ_CODEC_MICSCD 41 #define WM8350_IRQ_CODEC_MICD 42 +/* + * WM8350 Platform data. + * + * This must be initialised per platform for best audio performance. + * Please see WM8350 datasheet for information. + */ +struct wm8350_audio_platform_data { + int vmid_discharge_msecs; /* VMID --> OFF discharge time */ + int drain_msecs; /* OFF drain time */ + int cap_discharge_msecs; /* Cap ON (from OFF) discharge time */ + int vmid_charge_msecs; /* vmid power up time */ + u32 vmid_s_curve:2; /* vmid enable s curve speed */ + u32 dis_out4:2; /* out4 discharge speed */ + u32 dis_out3:2; /* out3 discharge speed */ + u32 dis_out2:2; /* out2 discharge speed */ + u32 dis_out1:2; /* out1 discharge speed */ + u32 vroi_out4:1; /* out4 tie off */ + u32 vroi_out3:1; /* out3 tie off */ + u32 vroi_out2:1; /* out2 tie off */ + u32 vroi_out1:1; /* out1 tie off */ + u32 vroi_enable:1; /* enable tie off */ + u32 codec_current_on:2; /* current level ON */ + u32 codec_current_standby:2; /* current level STANDBY */ + u32 codec_current_charge:2; /* codec current @ vmid charge */ +}; + +struct snd_soc_codec; + struct wm8350_codec { struct platform_device *pdev; + struct snd_soc_codec *codec; + struct wm8350_audio_platform_data *platform_data; }; #endif -- cgit v1.2.3 From 64db4cfff99c04cd5f550357edcc8780f96b54a2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 18 Dec 2008 21:55:32 +0100 Subject: "Tree RCU": scalable classic RCU implementation This patch fixes a long-standing performance bug in classic RCU that results in massive internal-to-RCU lock contention on systems with more than a few hundred CPUs. Although this patch creates a separate flavor of RCU for ease of review and patch maintenance, it is intended to replace classic RCU. This patch still handles stress better than does mainline, so I am still calling it ready for inclusion. This patch is against the -tip tree. Nevertheless, experience on an actual 1000+ CPU machine would still be most welcome. Most of the changes noted below were found while creating an rcutiny (which should permit ejecting the current rcuclassic) and while doing detailed line-by-line documentation. Updates from v9 (http://lkml.org/lkml/2008/12/2/334): o Fixes from remainder of line-by-line code walkthrough, including comment spelling, initialization, undesirable narrowing due to type conversion, removing redundant memory barriers, removing redundant local-variable initialization, and removing redundant local variables. I do not believe that any of these fixes address the CPU-hotplug issues that Andi Kleen was seeing, but please do give it a whirl in case the machine is smarter than I am. A writeup from the walkthrough may be found at the following URL, in case you are suffering from terminal insomnia or masochism: http://www.kernel.org/pub/linux/kernel/people/paulmck/tmp/rcutree-walkthrough.2008.12.16a.pdf o Made rcutree tracing use seq_file, as suggested some time ago by Lai Jiangshan. o Added a .csv variant of the rcudata debugfs trace file, to allow people having thousands of CPUs to drop the data into a spreadsheet. Tested with oocalc and gnumeric. Updated documentation to suit. Updates from v8 (http://lkml.org/lkml/2008/11/15/139): o Fix a theoretical race between grace-period initialization and force_quiescent_state() that could occur if more than three jiffies were required to carry out the grace-period initialization. Which it might, if you had enough CPUs. o Apply Ingo's printk-standardization patch. o Substitute local variables for repeated accesses to global variables. o Fix comment misspellings and redundant (but harmless) increments of ->n_rcu_pending (this latter after having explicitly added it). o Apply checkpatch fixes. Updates from v7 (http://lkml.org/lkml/2008/10/10/291): o Fixed a number of problems noted by Gautham Shenoy, including the cpu-stall-detection bug that he was having difficulty convincing me was real. ;-) o Changed cpu-stall detection to wait for ten seconds rather than three in order to reduce false positive, as suggested by Ingo Molnar. o Produced a design document (http://lwn.net/Articles/305782/). The act of writing this document uncovered a number of both theoretical and "here and now" bugs as noted below. o Fix dynticks_nesting accounting confusion, simplify WARN_ON() condition, fix kerneldoc comments, and add memory barriers in dynticks interface functions. o Add more data to tracing. o Remove unused "rcu_barrier" field from rcu_data structure. o Count calls to rcu_pending() from scheduling-clock interrupt to use as a surrogate timebase should jiffies stop counting. o Fix a theoretical race between force_quiescent_state() and grace-period initialization. Yes, initialization does have to go on for some jiffies for this race to occur, but given enough CPUs... Updates from v6 (http://lkml.org/lkml/2008/9/23/448): o Fix a number of checkpatch.pl complaints. o Apply review comments from Ingo Molnar and Lai Jiangshan on the stall-detection code. o Fix several bugs in !CONFIG_SMP builds. o Fix a misspelled config-parameter name so that RCU now announces at boot time if stall detection is configured. o Run tests on numerous combinations of configurations parameters, which after the fixes above, now build and run correctly. Updates from v5 (http://lkml.org/lkml/2008/9/15/92, bad subject line): o Fix a compiler error in the !CONFIG_FANOUT_EXACT case (blew a changeset some time ago, and finally got around to retesting this option). o Fix some tracing bugs in rcupreempt that caused incorrect totals to be printed. o I now test with a more brutal random-selection online/offline script (attached). Probably more brutal than it needs to be on the people reading it as well, but so it goes. o A number of optimizations and usability improvements: o Make rcu_pending() ignore the grace-period timeout when there is no grace period in progress. o Make force_quiescent_state() avoid going for a global lock in the case where there is no grace period in progress. o Rearrange struct fields to improve struct layout. o Make call_rcu() initiate a grace period if RCU was idle, rather than waiting for the next scheduling clock interrupt. o Invoke rcu_irq_enter() and rcu_irq_exit() only when idle, as suggested by Andi Kleen. I still don't completely trust this change, and might back it out. o Make CONFIG_RCU_TRACE be the single config variable manipulated for all forms of RCU, instead of the prior confusion. o Document tracing files and formats for both rcupreempt and rcutree. Updates from v4 for those missing v5 given its bad subject line: o Separated dynticks interface so that NMIs and irqs call separate functions, greatly simplifying it. In particular, this code no longer requires a proof of correctness. ;-) o Separated dynticks state out into its own per-CPU structure, avoiding the duplicated accounting. o The case where a dynticks-idle CPU runs an irq handler that invokes call_rcu() is now correctly handled, forcing that CPU out of dynticks-idle mode. o Review comments have been applied (thank you all!!!). For but one example, fixed the dynticks-ordering issue that Manfred pointed out, saving me much debugging. ;-) o Adjusted rcuclassic and rcupreempt to handle dynticks changes. Attached is an updated patch to Classic RCU that applies a hierarchy, greatly reducing the contention on the top-level lock for large machines. This passes 10-hour concurrent rcutorture and online-offline testing on 128-CPU ppc64 without dynticks enabled, and exposes some timekeeping bugs in presence of dynticks (exciting working on a system where "sleep 1" hangs until interrupted...), which were fixed in the 2.6.27 kernel. It is getting more reliable than mainline by some measures, so the next version will be against -tip for inclusion. See also Manfred Spraul's recent patches (or his earlier work from 2004 at http://marc.info/?l=linux-kernel&m=108546384711797&w=2). We will converge onto a common patch in the fullness of time, but are currently exploring different regions of the design space. That said, I have already gratefully stolen quite a few of Manfred's ideas. This patch provides CONFIG_RCU_FANOUT, which controls the bushiness of the RCU hierarchy. Defaults to 32 on 32-bit machines and 64 on 64-bit machines. If CONFIG_NR_CPUS is less than CONFIG_RCU_FANOUT, there is no hierarchy. By default, the RCU initialization code will adjust CONFIG_RCU_FANOUT to balance the hierarchy, so strongly NUMA architectures may choose to set CONFIG_RCU_FANOUT_EXACT to disable this balancing, allowing the hierarchy to be exactly aligned to the underlying hardware. Up to two levels of hierarchy are permitted (in addition to the root node), allowing up to 16,384 CPUs on 32-bit systems and up to 262,144 CPUs on 64-bit systems. I just know that I am going to regret saying this, but this seems more than sufficient for the foreseeable future. (Some architectures might wish to set CONFIG_RCU_FANOUT=4, which would limit such architectures to 64 CPUs. If this becomes a real problem, additional levels can be added, but I doubt that it will make a significant difference on real hardware.) In the common case, a given CPU will manipulate its private rcu_data structure and the rcu_node structure that it shares with its immediate neighbors. This can reduce both lock and memory contention by multiple orders of magnitude, which should eliminate the need for the strange manipulations that are reported to be required when running Linux on very large systems. Some shortcomings: o More bugs will probably surface as a result of an ongoing line-by-line code inspection. Patches will be provided as required. o There are probably hangs, rcutorture failures, &c. Seems quite stable on a 128-CPU machine, but that is kind of small compared to 4096 CPUs. However, seems to do better than mainline. Patches will be provided as required. o The memory footprint of this version is several KB larger than rcuclassic. A separate UP-only rcutiny patch will be provided, which will reduce the memory footprint significantly, even compared to the old rcuclassic. One such patch passes light testing, and has a memory footprint smaller even than rcuclassic. Initial reaction from various embedded guys was "it is not worth it", so am putting it aside. Credits: o Manfred Spraul for ideas, review comments, and bugs spotted, as well as some good friendly competition. ;-) o Josh Triplett, Ingo Molnar, Peter Zijlstra, Mathieu Desnoyers, Lai Jiangshan, Andi Kleen, Andy Whitcroft, and Andrew Morton for reviews and comments. o Thomas Gleixner for much-needed help with some timer issues (see patches below). o Jon M. Tollefson, Tim Pepper, Andrew Theurer, Jose R. Santos, Andy Whitcroft, Darrick Wong, Nishanth Aravamudan, Anton Blanchard, Dave Kleikamp, and Nathan Lynch for keeping machines alive despite my heavy abuse^Wtesting. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 14 +- include/linux/rcupdate.h | 10 +- include/linux/rcutree.h | 329 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 344 insertions(+), 9 deletions(-) create mode 100644 include/linux/rcutree.h (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a0..9b70b9231693 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -118,13 +118,17 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif -#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); +extern void rcu_nmi_enter(void); +extern void rcu_nmi_exit(void); #else # define rcu_irq_enter() do { } while (0) # define rcu_irq_exit() do { } while (0) -#endif /* CONFIG_PREEMPT_RCU */ +# define rcu_nmi_enter() do { } while (0) +# define rcu_nmi_exit() do { } while (0) +#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */ /* * It is safe to do non-atomic ops on ->hardirq_context, @@ -134,7 +138,6 @@ extern void rcu_irq_exit(void); */ #define __irq_enter() \ do { \ - rcu_irq_enter(); \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ @@ -153,7 +156,6 @@ extern void irq_enter(void); trace_hardirq_exit(); \ account_system_vtime(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ - rcu_irq_exit(); \ } while (0) /* @@ -161,7 +163,7 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() do { lockdep_off(); rcu_nmi_enter(); __irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); rcu_nmi_exit(); lockdep_on(); } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e43e33..bfd289aff576 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,11 +52,15 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; -#ifdef CONFIG_CLASSIC_RCU +#if defined(CONFIG_CLASSIC_RCU) #include -#else /* #ifdef CONFIG_CLASSIC_RCU */ +#elif defined(CONFIG_TREE_RCU) +#include +#elif defined(CONFIG_PREEMPT_RCU) #include -#endif /* #else #ifdef CONFIG_CLASSIC_RCU */ +#else +#error "Unknown RCU implementation specified to kernel configuration" +#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */ #define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h new file mode 100644 index 000000000000..d4368b7975c3 --- /dev/null +++ b/include/linux/rcutree.h @@ -0,0 +1,329 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Dipankar Sarma + * Paul E. McKenney Hierarchical algorithm + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#ifndef __LINUX_RCUTREE_H +#define __LINUX_RCUTREE_H + +#include +#include +#include +#include +#include +#include + +/* + * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. + * In theory, it should be possible to add more levels straightforwardly. + * In practice, this has not been tested, so there is probably some + * bug somewhere. + */ +#define MAX_RCU_LVLS 3 +#define RCU_FANOUT (CONFIG_RCU_FANOUT) +#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) +#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) + +#if NR_CPUS <= RCU_FANOUT +# define NUM_RCU_LVLS 1 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (NR_CPUS) +# define NUM_RCU_LVL_2 0 +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_SQ +# define NUM_RCU_LVLS 2 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) +# define NUM_RCU_LVL_2 (NR_CPUS) +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_CUBE +# define NUM_RCU_LVLS 3 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) +# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) +# define NUM_RCU_LVL_3 NR_CPUS +#else +# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" +#endif /* #if (NR_CPUS) <= RCU_FANOUT */ + +#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) + +/* + * Dynticks per-CPU state. + */ +struct rcu_dynticks { + int dynticks_nesting; /* Track nesting level, sort of. */ + int dynticks; /* Even value for dynticks-idle, else odd. */ + int dynticks_nmi; /* Even value for either dynticks-idle or */ + /* not in nmi handler, else odd. So this */ + /* remains even for nmi from irq handler. */ +}; + +/* + * Definition for node within the RCU grace-period-detection hierarchy. + */ +struct rcu_node { + spinlock_t lock; + unsigned long qsmask; /* CPUs or groups that need to switch in */ + /* order for current grace period to proceed.*/ + unsigned long qsmaskinit; + /* Per-GP initialization for qsmask. */ + unsigned long grpmask; /* Mask to apply to parent qsmask. */ + int grplo; /* lowest-numbered CPU or group here. */ + int grphi; /* highest-numbered CPU or group here. */ + u8 grpnum; /* CPU/group number for next level up. */ + u8 level; /* root is at level 0. */ + struct rcu_node *parent; +} ____cacheline_internodealigned_in_smp; + +/* Index values for nxttail array in struct rcu_data. */ +#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ +#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ +#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ +#define RCU_NEXT_TAIL 3 +#define RCU_NEXT_SIZE 4 + +/* Per-CPU data for read-copy update. */ +struct rcu_data { + /* 1) quiescent-state and grace-period handling : */ + long completed; /* Track rsp->completed gp number */ + /* in order to detect GP end. */ + long gpnum; /* Highest gp number that this CPU */ + /* is aware of having started. */ + long passed_quiesc_completed; + /* Value of completed at time of qs. */ + bool passed_quiesc; /* User-mode/idle loop etc. */ + bool qs_pending; /* Core waits for quiesc state. */ + bool beenonline; /* CPU online at least once. */ + struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ + unsigned long grpmask; /* Mask to apply to leaf qsmask. */ + + /* 2) batch handling */ + /* + * If nxtlist is not NULL, it is partitioned as follows. + * Any of the partitions might be empty, in which case the + * pointer to that partition will be equal to the pointer for + * the following partition. When the list is empty, all of + * the nxttail elements point to nxtlist, which is NULL. + * + * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]): + * Entries that might have arrived after current GP ended + * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): + * Entries known to have arrived before current GP ended + * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): + * Entries that batch # <= ->completed - 1: waiting for current GP + * [nxtlist, *nxttail[RCU_DONE_TAIL]): + * Entries that batch # <= ->completed + * The grace period for these entries has completed, and + * the other grace-period-completed entries may be moved + * here temporarily in rcu_process_callbacks(). + */ + struct rcu_head *nxtlist; + struct rcu_head **nxttail[RCU_NEXT_SIZE]; + long qlen; /* # of queued callbacks */ + long blimit; /* Upper limit on a processed batch */ + +#ifdef CONFIG_NO_HZ + /* 3) dynticks interface. */ + struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ + int dynticks_snap; /* Per-GP tracking for dynticks. */ + int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ +#endif /* #ifdef CONFIG_NO_HZ */ + + /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ +#ifdef CONFIG_NO_HZ + unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ +#endif /* #ifdef CONFIG_NO_HZ */ + unsigned long offline_fqs; /* Kicked due to being offline. */ + unsigned long resched_ipi; /* Sent a resched IPI. */ + + /* 5) state to allow this CPU to force_quiescent_state on others */ + long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rcu_pending_force_qs; /* when to force quiescent states. */ + + int cpu; +}; + +/* Values for signaled field in struct rcu_state. */ +#define RCU_GP_INIT 0 /* Grace period being initialized. */ +#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ +#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ +#ifdef CONFIG_NO_HZ +#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK +#else /* #ifdef CONFIG_NO_HZ */ +#define RCU_SIGNAL_INIT RCU_FORCE_QS +#endif /* #else #ifdef CONFIG_NO_HZ */ + +#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ + /* to take at least one */ + /* scheduling clock irq */ + /* before ratting on them. */ + +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * RCU global state, including node hierarchy. This hierarchy is + * represented in "heap" form in a dense array. The root (first level) + * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second + * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), + * and the third level in ->node[m+1] and following (->node[m+1] referenced + * by ->level[2]). The number of levels is determined by the number of + * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" + * consisting of a single rcu_node. + */ +struct rcu_state { + struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ + struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ + u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ + + /* The following fields are guarded by the root rcu_node's lock. */ + + u8 signaled ____cacheline_internodealigned_in_smp; + /* Force QS state. */ + long gpnum; /* Current gp number. */ + long completed; /* # of last completed gp. */ + spinlock_t onofflock; /* exclude on/offline and */ + /* starting new GP. */ + spinlock_t fqslock; /* Only one task forcing */ + /* quiescent states. */ + unsigned long jiffies_force_qs; /* Time at which to invoke */ + /* force_quiescent_state(). */ + unsigned long n_force_qs; /* Number of calls to */ + /* force_quiescent_state(). */ + unsigned long n_force_qs_lh; /* ~Number of calls leaving */ + /* due to lock unavailable. */ + unsigned long n_force_qs_ngp; /* Number of calls leaving */ + /* due to no GP active. */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + unsigned long gp_start; /* Time at which GP started, */ + /* but in jiffies. */ + unsigned long jiffies_stall; /* Time at which to check */ + /* for CPU stalls. */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#ifdef CONFIG_NO_HZ + long dynticks_completed; /* Value of completed @ snap. */ +#endif /* #ifdef CONFIG_NO_HZ */ +}; + +extern struct rcu_state rcu_state; +DECLARE_PER_CPU(struct rcu_data, rcu_data); + +extern struct rcu_state rcu_bh_state; +DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); + +/* + * Increment the quiescent state counter. + * The counter is a bit degenerated: We do not need to know + * how many quiescent states passed, just if there was at least + * one since the start of the grace period. Thus just a flag. + */ +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} +static inline void rcu_bh_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} + +extern int rcu_pending(int cpu); +extern int rcu_needs_cpu(int cpu); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); + __acquire(RCU); + rcu_read_acquire(); +} +static inline void __rcu_read_unlock(void) +{ + rcu_read_release(); + __release(RCU); + preempt_enable(); +} +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); + __acquire(RCU_BH); + rcu_read_acquire(); +} +static inline void __rcu_read_unlock_bh(void) +{ + rcu_read_release(); + __release(RCU_BH); + local_bh_enable(); +} + +#define __synchronize_sched() synchronize_rcu() + +#define call_rcu_sched(head, func) call_rcu(head, func) + +static inline void rcu_init_sched(void) +{ +} + +extern void __rcu_init(void); +extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_restart_cpu(int cpu); + +extern long rcu_batches_completed(void); +extern long rcu_batches_completed_bh(void); + +#ifdef CONFIG_NO_HZ +void rcu_enter_nohz(void); +void rcu_exit_nohz(void); +#else /* CONFIG_NO_HZ */ +static inline void rcu_enter_nohz(void) +{ +} +static inline void rcu_exit_nohz(void) +{ +} +#endif /* CONFIG_NO_HZ */ + +#endif /* __LINUX_RCUTREE_H */ -- cgit v1.2.3 From 3c8bb73ace6249bd089b70c941440441940e3365 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:27 -0800 Subject: x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3 Impact: Code transformation, new functions added should have no effect. Drivers use mmap followed by pgprot_* and remap_pfn_range or vm_insert_pfn, in order to export reserved memory to userspace. Currently, such mappings are not tracked and hence not kept consistent with other mappings (/dev/mem, pci resource, ioremap) for the sme memory, that may exist in the system. The following patchset adds x86 PAT attribute tracking and untracking for pfnmap related APIs. First three patches in the patchset are changing the generic mm code to fit in this tracking. Last four patches are x86 specific to make things work with x86 PAT code. The patchset aso introduces pgprot_writecombine interface, which gives writecombine mapping when enabled, falling back to pgprot_noncached otherwise. This patch: While working on x86 PAT, we faced some hurdles with trackking remap_pfn_range() regions, as we do not have any information to say whether that PFNMAP mapping is linear for the entire vma range or it is smaller granularity regions within the vma. A simple solution to this is to use vm_pgoff as an indicator for linear mapping over the vma region. Currently, remap_pfn_range only sets vm_pgoff for COW mappings. Below patch changes the logic and sets the vm_pgoff irrespective of COW. This will still not be enough for the case where pfn is zero (vma region mapped to physical address zero). But, for all the other cases, we can look at pfnmap VMAs and say whether the mappng is for the entire vma region or not. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ffee2f743418..2be8d9b5e46f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -145,6 +145,15 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) +{ + return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); +} + +static inline int is_pfn_mapping(struct vm_area_struct *vma) +{ + return (vma->vm_flags & VM_PFNMAP); +} /* * vm_fault is filled by the the pagefault handler and passed to the vma's -- cgit v1.2.3 From e121e418441525b5636321fe03d16f0193ad218e Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:28 -0800 Subject: x86: PAT: add follow_pfnmp_pte routine to help tracking pfnmap pages - v3 Impact: New currently unused interface. Add a generic interface to follow pfn in a pfnmap vma range. This is used by one of the subsequent x86 PAT related patch to keep track of memory types for vma regions across vma copy and free. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2be8d9b5e46f..a25024ff9c11 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1223,6 +1223,9 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ +int follow_pfnmap_pte(struct vm_area_struct *vma, + unsigned long address, pte_t *ret_ptep); + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, -- cgit v1.2.3 From 2ab640379a0ab4cef746ced1d7e04a0941774bcb Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:29 -0800 Subject: x86: PAT: hooks in generic vm code to help archs to track pfnmap regions - v3 Impact: Introduces new hooks, which are currently null. Introduce generic hooks in remap_pfn_range and vm_insert_pfn and corresponding copy and free routines with reserve and free tracking. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a25024ff9c11..87ecb40e11a0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -155,6 +155,12 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma) return (vma->vm_flags & VM_PFNMAP); } +extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size); +extern int track_pfn_vma_copy(struct vm_area_struct *vma); +extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size); + /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask -- cgit v1.2.3 From 7b4967c532045a1983d6d4af5c69cc7c5109f62b Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 19 Dec 2008 16:56:37 +1030 Subject: cpumask: Add alloc_cpumask_var_node() Impact: New API This will be needed in x86 code to allocate the domain and old_domain cpumasks on the same node as where the containing irq_cfg struct is allocated. (Also fixes double-dump_stack on rare CONFIG_DEBUG_PER_CPU_MAPS case) Signed-off-by: Mike Travis Signed-off-by: Rusty Russell (re-impl alloc_cpumask_var) --- include/linux/cpumask.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d4bf52603e6b..b5ad19a6f43f 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1025,6 +1025,7 @@ static inline size_t cpumask_size(void) #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; +bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); @@ -1038,6 +1039,12 @@ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) return true; } +static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, + int node) +{ + return true; +} + static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } -- cgit v1.2.3 From e057d7aea9d8f2a46cd440d8bfb72245d4e72d79 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 15 Dec 2008 20:26:48 -0800 Subject: cpumask: add sysfs displays for configured and disabled cpu maps Impact: add new sysfs files. Add sysfs files "kernel_max" and "offline" to display the max CPU index allowed (NR_CPUS-1), and the map of cpus that are offline. Cpus can be offlined via HOTPLUG, disabled by the BIOS ACPI tables, or if they exceed the number of cpus allowed by the NR_CPUS config option, or the "maxcpus=NUM" kernel start parameter. The "possible_cpus=NUM" parameter can also extend the number of possible cpus allowed, in which case the cpus not present at startup will be in the offline state. (These cpus can be HOTPLUGGED ON after system startup [pending a follow-on patch to provide the capability via the /sys/devices/sys/cpu/cpuN/online mechanism to bring them online.]) By design, the "offlined cpus > possible cpus" display will always use the following formats: * all possible cpus online: "x$" or "x-y$" * some possible cpus offline: ".*,x$" or ".*,x-y$" where: x == number of possible cpus (nr_cpu_ids); and y == number of cpus >= NR_CPUS or maxcpus (if y > x). One use of this feature is for distros to select (or configure) the appropriate kernel to install for the resident system. Notes: * cpus offlined <= possible cpus will be printed for all architectures. * cpus offlined > possible cpus will only be printed for arches that set 'total_cpus' [X86 only in this patch]. Based on tip/cpus4096 + .../rusty/linux-2.6-for-ingo.git/master + x86-only-patches sent 12/15. Signed-off-by: Mike Travis Signed-off-by: Rusty Russell --- include/linux/smp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 3f9a60043a97..0d5770c2e43a 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -21,6 +21,9 @@ struct call_single_data { u16 priv; }; +/* total number of cpus in this system (may exceed NR_CPUS) */ +extern unsigned int total_cpus; + #ifdef CONFIG_SMP #include -- cgit v1.2.3 From 716707b29906e1d8d190defe3d646610b097a861 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Thu, 18 Dec 2008 23:26:02 +0530 Subject: sched: convert BALANCE_FOR_xx_POWER to inline functions Impact: cleanup BALANCE_FOR_MC_POWER and similar macros defined in sched.h are not constants and have various condition checks and significant amount of code that is not suitable to be contain in a macro. Also there could be side effects on the expressions passed to some of them like test_sd_parent(). This patch converts all complex macros related to power savings balance to inline functions. Signed-off-by: Vaidyanathan Srinivasan Acked-by: Balbir Singh Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 33 ++++++++++++++++++++++++--------- include/linux/topology.h | 4 ++-- 2 files changed, 26 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4240f6bfa812..1210fb0e45ff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -763,15 +763,23 @@ enum cpu_idle_type { #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ -#define BALANCE_FOR_MC_POWER \ - (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) +extern int sched_mc_power_savings, sched_smt_power_savings; + +static inline int sd_balance_for_mc_power(void) +{ + if (sched_smt_power_savings) + return SD_POWERSAVINGS_BALANCE; -#define BALANCE_FOR_PKG_POWER \ - ((sched_mc_power_savings || sched_smt_power_savings) ? \ - SD_POWERSAVINGS_BALANCE : 0) + return 0; +} -#define test_sd_parent(sd, flag) ((sd->parent && \ - (sd->parent->flags & flag)) ? 1 : 0) +static inline int sd_balance_for_package_power(void) +{ + if (sched_mc_power_savings | sched_smt_power_savings) + return SD_POWERSAVINGS_BALANCE; + + return 0; +} struct sched_group { @@ -1399,6 +1407,15 @@ struct task_struct { #endif }; +/* Test a flag in parent sched domain */ +static inline int test_sd_parent(struct sched_domain *sd, int flag) +{ + if (sd->parent && (sd->parent->flags & flag)) + return 1; + + return 0; +} + /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH @@ -2256,8 +2273,6 @@ __trace_special(void *__tr, void *__data, extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -extern int sched_mc_power_savings, sched_smt_power_savings; - extern void normalize_rt_tasks(void); #ifdef CONFIG_GROUP_SCHED diff --git a/include/linux/topology.h b/include/linux/topology.h index 0c5b5ac36d8e..0ce7c0dac06c 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -125,7 +125,7 @@ int arch_update_cpu_topology(void); | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ - | BALANCE_FOR_MC_POWER, \ + | sd_balance_for_mc_power(),\ .last_balance = jiffies, \ .balance_interval = 1, \ } @@ -150,7 +150,7 @@ int arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ - | BALANCE_FOR_PKG_POWER,\ + | sd_balance_for_package_power(),\ .last_balance = jiffies, \ .balance_interval = 1, \ } -- cgit v1.2.3 From afb8a9b70b86866a60e08b2956ae4e1406390336 Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Thu, 18 Dec 2008 23:26:09 +0530 Subject: sched: framework for sched_mc/smt_power_savings=N Impact: extend range of /sys/devices/system/cpu/sched_mc_power_savings Currently the sched_mc/smt_power_savings variable is a boolean, which either enables or disables topology based power savings. This patch extends the behaviour of the variable from boolean to multivalued, such that based on the value, we decide how aggressively do we want to perform powersavings balance at appropriate sched domain based on topology. Variable levels of power saving tunable would benefit end user to match the required level of power savings vs performance trade-off depending on the system configuration and workloads. This version makes the sched_mc_power_savings global variable to take more values (0,1,2). Later versions can have a single tunable called sched_power_savings instead of sched_{mc,smt}_power_savings. Signed-off-by: Gautham R Shenoy Signed-off-by: Vaidyanathan Srinivasan Acked-by: Balbir Singh Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1210fb0e45ff..a96726658eca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -763,6 +763,17 @@ enum cpu_idle_type { #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ +enum powersavings_balance_level { + POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ + POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package + * first for long running threads + */ + POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle + * cpu package for power savings + */ + MAX_POWERSAVINGS_BALANCE_LEVELS +}; + extern int sched_mc_power_savings, sched_smt_power_savings; static inline int sd_balance_for_mc_power(void) -- cgit v1.2.3 From 100fdaee70ebf5f31b9451fbc01300c627091328 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Thu, 18 Dec 2008 23:26:47 +0530 Subject: sched: add SD_BALANCE_NEWIDLE at MC and CPU level for sched_mc>0 Impact: change task balancing to save power more agressively Add SD_BALANCE_NEWIDLE flag at MC level and CPU level if sched_mc is set. This helps power savings and will not affect performance when sched_mc=0 Ingo and Mike Galbraith have optimised the SD flags by removing SD_BALANCE_NEWIDLE at MC and CPU level. This helps performance but hurts power savings since this slows down task consolidation by reducing the number of times load_balance is run. sched: fine-tune SD_MC_INIT commit 14800984706bf6936bbec5187f736e928be5c218 Author: Mike Galbraith Date: Fri Nov 7 15:26:50 2008 +0100 sched: re-tune balancing -- revert commit 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Author: Ingo Molnar Date: Wed Nov 5 16:52:08 2008 +0100 This patch selectively enables SD_BALANCE_NEWIDLE flag only when sched_mc is set to 1 or 2. This helps power savings by task consolidation and also does not hurt performance at sched_mc=0 where all power saving optimisations are turned off. Signed-off-by: Vaidyanathan Srinivasan Acked-by: Balbir Singh Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 13 +++++++++++++ include/linux/topology.h | 6 ++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a96726658eca..5a933d925473 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -792,6 +792,19 @@ static inline int sd_balance_for_package_power(void) return 0; } +/* + * Optimise SD flags for power savings: + * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings. + * Keep default SD flags if sched_{smt,mc}_power_saving=0 + */ + +static inline int sd_power_saving_flags(void) +{ + if (sched_mc_power_savings | sched_smt_power_savings) + return SD_BALANCE_NEWIDLE; + + return 0; +} struct sched_group { struct sched_group *next; /* Must be a circular list */ diff --git a/include/linux/topology.h b/include/linux/topology.h index 0ce7c0dac06c..e632d29f0544 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -125,7 +125,8 @@ int arch_update_cpu_topology(void); | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ - | sd_balance_for_mc_power(),\ + | sd_balance_for_mc_power()\ + | sd_power_saving_flags(),\ .last_balance = jiffies, \ .balance_interval = 1, \ } @@ -150,7 +151,8 @@ int arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ - | sd_balance_for_package_power(),\ + | sd_balance_for_package_power()\ + | sd_power_saving_flags(),\ .last_balance = jiffies, \ .balance_interval = 1, \ } -- cgit v1.2.3 From 06aaf76a7e2e4cc57eabcb8f43ec99c961fe55fe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Dec 2008 21:30:23 +0100 Subject: sched: move test_sd_parent() to an SMP section of sched.h Impact: build fix Signed-off-by: Ingo Molnar --- include/linux/sched.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a933d925473..e5f928a079e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -920,6 +920,15 @@ extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); +/* Test a flag in parent sched domain */ +static inline int test_sd_parent(struct sched_domain *sd, int flag) +{ + if (sd->parent && (sd->parent->flags & flag)) + return 1; + + return 0; +} + #else /* CONFIG_SMP */ struct sched_domain_attr; @@ -1431,15 +1440,6 @@ struct task_struct { #endif }; -/* Test a flag in parent sched domain */ -static inline int test_sd_parent(struct sched_domain *sd, int flag) -{ - if (sd->parent && (sd->parent->flags & flag)) - return 1; - - return 0; -} - /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH -- cgit v1.2.3 From 3f4b0ef7f2899c91b1d6958779f084b44dd59d32 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Oct 2008 20:52:15 +0100 Subject: ACPI hibernate: Add a mechanism to save/restore ACPI NVS memory According to the ACPI Specification 3.0b, Section 15.3.2, "OSPM will call the _PTS control method some time before entering a sleeping state, to allow the platform's AML code to update this memory image before entering the sleeping state. After the system awakes from an S4 state, OSPM will restore this memory area and call the _WAK control method to enable the BIOS to reclaim its memory image." For this reason, implement a mechanism allowing us to save the NVS memory during hibernation and to restore it during the subsequent resume. Based on a patch by Zhang Rui. Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Cc: Zhang Rui Signed-off-by: Len Brown --- include/linux/suspend.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 2ce8207686e2..2b409c44db83 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -232,6 +232,11 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern void hibernation_set_ops(struct platform_hibernation_ops *ops); extern int hibernate(void); +extern int hibernate_nvs_register(unsigned long start, unsigned long size); +extern int hibernate_nvs_alloc(void); +extern void hibernate_nvs_free(void); +extern void hibernate_nvs_save(void); +extern void hibernate_nvs_restore(void); #else /* CONFIG_HIBERNATION */ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} @@ -239,6 +244,14 @@ static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } +static inline int hibernate_nvs_register(unsigned long a, unsigned long b) +{ + return 0; +} +static inline int hibernate_nvs_alloc(void) { return 0; } +static inline void hibernate_nvs_free(void) {} +static inline void hibernate_nvs_save(void) {} +static inline void hibernate_nvs_restore(void) {} #endif /* CONFIG_HIBERNATION */ #ifdef CONFIG_PM_SLEEP -- cgit v1.2.3 From ba84ed9546e91348fdf3ff2bff859b0ee53b407a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Oct 2008 20:56:30 +0100 Subject: ACPI hibernate: Introduce new kernel parameter acpi_sleep=s4_nonvs On some machines it may be necessary to disable the saving/restoring of the ACPI NVS memory region during hibernation/resume. For this purpose, introduce new ACPI kernel command line option acpi_sleep=s4_nonvs. Based on a patch by Zhang Rui. Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Acked-by: Pavel Machek Signed-off-by: Len Brown --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fba8051fb297..dfa0a5356c53 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -270,6 +270,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, #ifdef CONFIG_PM_SLEEP void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); +void __init acpi_s4_no_nvs(void); #endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ -- cgit v1.2.3 From 420e7fabd9c6d907280ed6b3e40eef425c5d8d8d Mon Sep 17 00:00:00 2001 From: Henning Rogge Date: Thu, 11 Dec 2008 22:04:19 +0100 Subject: nl80211: Add signal strength and bandwith to nl80211station info This patch adds signal strength and transmission bitrate to the station_info of nl80211. Signed-off-by: Henning Rogge Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 04d4516f9c71..7501acfcfdc4 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -424,6 +424,32 @@ enum nl80211_sta_flags { NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1 }; +/** + * enum nl80211_rate_info - bitrate information + * + * These attribute types are used with %NL80211_STA_INFO_TXRATE + * when getting information about the bitrate of a station. + * + * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved + * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) + * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) + * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 Mhz dualchannel bitrate + * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval + * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined + * @__NL80211_RATE_INFO_AFTER_LAST: internal use + */ +enum nl80211_rate_info { + __NL80211_RATE_INFO_INVALID, + NL80211_RATE_INFO_BITRATE, + NL80211_RATE_INFO_MCS, + NL80211_RATE_INFO_40_MHZ_WIDTH, + NL80211_RATE_INFO_SHORT_GI, + + /* keep last */ + __NL80211_RATE_INFO_AFTER_LAST, + NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1 +}; + /** * enum nl80211_sta_info - station information * @@ -436,6 +462,9 @@ enum nl80211_sta_flags { * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute + * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) + * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute + * containing info as possible, see &enum nl80211_sta_info_txrate. */ enum nl80211_sta_info { __NL80211_STA_INFO_INVALID, @@ -445,6 +474,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_LLID, NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, + NL80211_STA_INFO_SIGNAL, + NL80211_STA_INFO_TX_BITRATE, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From 094d05dc32fc2930e381189a942016e5561775d9 Mon Sep 17 00:00:00 2001 From: Sujith Date: Fri, 12 Dec 2008 11:57:43 +0530 Subject: mac80211: Fix HT channel selection HT management is done differently for AP and STA modes, unify to just the ->config() callback since HT is fundamentally a PHY property and cannot be per-BSS. Rename enum nl80211_sec_chan_offset as nl80211_channel_type to denote the channel type ( NO_HT, HT20, HT40+, HT40- ). Signed-off-by: Johannes Berg Signed-off-by: Sujith Signed-off-by: John W. Linville --- include/linux/nl80211.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7501acfcfdc4..e86ed59f9ad5 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -201,13 +201,13 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz - * @NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET: included with NL80211_ATTR_WIPHY_FREQ + * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ * if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included): - * NL80211_SEC_CHAN_NO_HT = HT not allowed (i.e., same as not including + * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including * this attribute) - * NL80211_SEC_CHAN_DISABLED = HT20 only - * NL80211_SEC_CHAN_BELOW = secondary channel is below the primary channel - * NL80211_SEC_CHAN_ABOVE = secondary channel is above the primary channel + * NL80211_CHAN_HT20 = HT20 only + * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel + * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -344,7 +344,7 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_TXQ_PARAMS, NL80211_ATTR_WIPHY_FREQ, - NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET, + NL80211_ATTR_WIPHY_CHANNEL_TYPE, /* add attributes here, update the policy in nl80211.c */ @@ -805,10 +805,10 @@ enum nl80211_txq_q { NL80211_TXQ_Q_BK }; -enum nl80211_sec_chan_offset { - NL80211_SEC_CHAN_NO_HT /* No HT */, - NL80211_SEC_CHAN_DISABLED /* HT20 only */, - NL80211_SEC_CHAN_BELOW /* HT40- */, - NL80211_SEC_CHAN_ABOVE /* HT40+ */ +enum nl80211_channel_type { + NL80211_CHAN_NO_HT, + NL80211_CHAN_HT20, + NL80211_CHAN_HT40MINUS, + NL80211_CHAN_HT40PLUS }; #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 12204e24b1330428c3062faee10a0d80b8a5cb61 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 19 Dec 2008 10:44:42 +1100 Subject: security: pass mount flags to security_sb_kern_mount() Pass mount flags to security_sb_kern_mount(), so security modules can determine if a mount operation is being performed by the kernel. Signed-off-by: James Morris Acked-by: Stephen Smalley --- include/linux/security.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 6423abf1ac0f..3416cb85e77b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1308,7 +1308,7 @@ struct security_operations { int (*sb_alloc_security) (struct super_block *sb); void (*sb_free_security) (struct super_block *sb); int (*sb_copy_data) (char *orig, char *copy); - int (*sb_kern_mount) (struct super_block *sb, void *data); + int (*sb_kern_mount) (struct super_block *sb, int flags, void *data); int (*sb_show_options) (struct seq_file *m, struct super_block *sb); int (*sb_statfs) (struct dentry *dentry); int (*sb_mount) (char *dev_name, struct path *path, @@ -1575,7 +1575,7 @@ int security_bprm_secureexec(struct linux_binprm *bprm); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); int security_sb_copy_data(char *orig, char *copy); -int security_sb_kern_mount(struct super_block *sb, void *data); +int security_sb_kern_mount(struct super_block *sb, int flags, void *data); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(char *dev_name, struct path *path, @@ -1850,7 +1850,7 @@ static inline int security_sb_copy_data(char *orig, char *copy) return 0; } -static inline int security_sb_kern_mount(struct super_block *sb, void *data) +static inline int security_sb_kern_mount(struct super_block *sb, int flags, void *data) { return 0; } -- cgit v1.2.3 From 6bd9cd50c830eb88d571c492ec370a30bf999e15 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:26 -0800 Subject: x86: PAT: clarify is_linear_pfn_mapping() interface Impact: Documentation only Incremental patches to address the review comments from Nick Piggin for v3 version of x86 PAT pfnmap changes patchset here http://lkml.indiana.edu/hypermail/linux/kernel/0812.2/01330.html This patch: Clarify is_linear_pfn_mapping() and its usage. It is used by x86 PAT code for performance reasons. Identifying pfnmap as linear over entire vma helps speedup reserve and free of memtype for the region. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 87ecb40e11a0..35f811b0cd69 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -145,6 +145,14 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +/* + * This interface is used by x86 PAT code to identify a pfn mapping that is + * linear over entire vma. This is to optimize PAT code that deals with + * marking the physical region with a particular prot. This is not for generic + * mm use. Note also that this check will not work if the pfn mapping is + * linear for a vma starting at physical address 0. In which case PAT code + * falls back to slow path of reserving physical range page by page. + */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); -- cgit v1.2.3 From d87fe6607c31944f7572f965c1507ae77026c133 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:27 -0800 Subject: x86: PAT: modify follow_phys to return phys_addr prot and return value Impact: Changes and globalizes an existing static interface. Follow_phys does similar things as follow_pfnmap_pte. Make a minor change to follow_phys so that it can be used in place of follow_pfnmap_pte. Physical address return value with 0 as error return does not work in follow_phys as the actual physical address 0 mapping may exist in pte. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 35f811b0cd69..2f6e2f886d4b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -804,6 +804,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); +int follow_phys(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, unsigned long *prot, resource_size_t *phys); int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); -- cgit v1.2.3 From 982d789ab76c8a11426852fec2fdf2f412e21c0c Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:28 -0800 Subject: x86: PAT: remove follow_pfnmap_pte in favor of follow_phys Impact: Cleanup - removes a new function in favor of a recently modified older one. Replace follow_pfnmap_pte in pat code with follow_phys. follow_phys lso returns protection eliminating the need of pte_pgprot call. Using follow_phys also eliminates the need for pte_pa. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f6e2f886d4b..36f9b3fa5e15 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1239,9 +1239,6 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ -int follow_pfnmap_pte(struct vm_area_struct *vma, - unsigned long address, pte_t *ret_ptep); - typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, -- cgit v1.2.3 From 34801ba9bf0381fcf0e2b08179d2c07f2c6ede74 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:29 -0800 Subject: x86: PAT: move track untrack pfnmap stubs to asm-generic Impact: Cleanup and branch hints only. Move the track and untrack pfn stub routines from memory.c to asm-generic. Also add unlikely to pfnmap related calls in fork and exit path. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 36f9b3fa5e15..d3ddd735e375 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -163,12 +163,6 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma) return (vma->vm_flags & VM_PFNMAP); } -extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, - unsigned long pfn, unsigned long size); -extern int track_pfn_vma_copy(struct vm_area_struct *vma); -extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size); - /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask -- cgit v1.2.3 From bf53de907dfdaac178c92d774aae7370d7b97d20 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 19 Dec 2008 15:10:24 +0100 Subject: x86, bts: add fork and exit handling Impact: introduce new ptrace facility Add arch_ptrace_untrace() function that is called when the tracer detaches (either voluntarily or when the tracing task dies); ptrace_disable() is only called on a voluntary detach. Add ptrace_fork() and arch_ptrace_fork(). They are called when a traced task is forked. Clear DS and BTS related fields on fork. Release DS resources and reclaim memory in ptrace_untrace(). This releases resources already when the tracing task dies. We used to do that when the traced task dies. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- include/linux/ptrace.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 22641d5d45df..98b93ca4db06 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -94,6 +94,7 @@ extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); +extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -313,6 +314,27 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_stop(code, info) do { } while (0) #endif +#ifndef arch_ptrace_untrace +/* + * Do machine-specific work before untracing child. + * + * This is called for a normal detach as well as from ptrace_exit() + * when the tracing task dies. + * + * Called with write_lock(&tasklist_lock) held. + */ +#define arch_ptrace_untrace(task) do { } while (0) +#endif + +#ifndef arch_ptrace_fork +/* + * Do machine-specific work to initialize a new task. + * + * This is called from copy_process(). + */ +#define arch_ptrace_fork(child, clone_flags) do { } while (0) +#endif + extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); -- cgit v1.2.3 From c5dee6177f4bd2095aab7d9be9f6ebdddd6deee9 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 19 Dec 2008 15:17:02 +0100 Subject: x86, bts: memory accounting Impact: move the BTS buffer accounting to the mlock bucket Add alloc_locked_buffer() and free_locked_buffer() functions to mm/mlock.c to kalloc a buffer and account the locked memory to current. Account the memory for the BTS buffer to the tracer. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ffee2f743418..9979d3fab6e7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1286,5 +1286,7 @@ int vmemmap_populate_basepages(struct page *start_page, int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); +extern void *alloc_locked_buffer(size_t size); +extern void free_locked_buffer(void *buffer, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 50b6f1f4a430608f7345f66ecd68a129bff11649 Mon Sep 17 00:00:00 2001 From: Kwangwoo Lee Date: Sat, 20 Dec 2008 04:26:01 -0500 Subject: Input: add tsc2007 based touchscreen driver This drive has been tested on ARM9 based SoC - MV86XX. Signed-off-by: Kwangwoo Lee Acked-by: Jean Delvare Signed-off-by: Dmitry Torokhov --- include/linux/i2c/tsc2007.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/i2c/tsc2007.h (limited to 'include/linux') diff --git a/include/linux/i2c/tsc2007.h b/include/linux/i2c/tsc2007.h new file mode 100644 index 000000000000..c6361fbb7bf9 --- /dev/null +++ b/include/linux/i2c/tsc2007.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_I2C_TSC2007_H +#define __LINUX_I2C_TSC2007_H + +/* linux/i2c/tsc2007.h */ + +struct tsc2007_platform_data { + u16 model; /* 2007. */ + u16 x_plate_ohms; + + int (*get_pendown_state)(void); + void (*clear_penirq)(void); /* If needed, clear 2nd level + interrupt source */ + int (*init_platform_hw)(void); + void (*exit_platform_hw)(void); +}; + +#endif -- cgit v1.2.3 From 749820928a2fd47ff536773d869d2c3f8038b7d1 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 5 Dec 2008 08:15:54 +0000 Subject: of/gpio: Implement of_gpio_count() This function is used to count how many GPIOs are specified for a device node. Signed-off-by: Anton Vorontsov Signed-off-by: Paul Mackerras --- include/linux/of_gpio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index e25abf610cb6..fc2472c3c254 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -65,6 +65,7 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc) extern int of_get_gpio_flags(struct device_node *np, int index, enum of_gpio_flags *flags); +extern unsigned int of_gpio_count(struct device_node *np); extern int of_mm_gpiochip_add(struct device_node *np, struct of_mm_gpio_chip *mm_gc); @@ -81,6 +82,11 @@ static inline int of_get_gpio_flags(struct device_node *np, int index, return -ENOSYS; } +static inline unsigned int of_gpio_count(struct device_node *np) +{ + return 0; +} + #endif /* CONFIG_OF_GPIO */ /** -- cgit v1.2.3 From 3ddeb912f41801fd1968c7880d031702a396e4d0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 20 Dec 2008 17:15:14 +0800 Subject: ftrace: enable format arguments checking Impact: broaden gcc printf format checks for ftrace_printk() format arguments checking for ftrace_printk() is __printf(1, 2), not __printf(1, 0). Signed-off-by: Lai Jiangshan Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 04b52e6ebc66..677432b9cb7e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -303,7 +303,7 @@ extern void ftrace_dump(void); static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int -ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); +ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } -- cgit v1.2.3 From f4314e815e87b4ab1c9b1115dd5853cd20ca999c Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Sun, 21 Dec 2008 20:10:29 -0800 Subject: net: add DCNA attribute to the BCN interface for DCB Adds the Backward Congestion Notification Address (BCNA) attribute to the Backward Congestion Notification (BCN) interface for Data Center Bridging (DCB), which was missing. Receive the BCNA attribute in the ixgbe driver. The BCNA attribute is for a switch to inform the endstation about the physical port identification in order to support BCN on aggregated links. Signed-off-by: Don Skidmore Signed-off-by: Eric W Multanen Signed-off-by: Jeff Kirsher --- include/linux/dcbnl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index e73a61449ad6..b0ef274e0031 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -305,6 +305,8 @@ enum dcbnl_bcn_attrs{ DCB_BCN_ATTR_RP_7, DCB_BCN_ATTR_RP_ALL, + DCB_BCN_ATTR_BCNA_0, + DCB_BCN_ATTR_BCNA_1, DCB_BCN_ATTR_ALPHA, DCB_BCN_ATTR_BETA, DCB_BCN_ATTR_GD, -- cgit v1.2.3 From 209aa4fdc39eacc145a7f9c32a4b9ffcc68912c6 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 12 Dec 2008 16:35:40 +0900 Subject: fb: SH-5 uses __raw I/O accessors now also, drop the special casing. Signed-off-by: Paul Mundt --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 75a81eaf3430..1ee63df5be92 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -888,7 +888,7 @@ struct fb_info { #define fb_writeq sbus_writeq #define fb_memset sbus_memset_io -#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__) || defined(__avr32__) +#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) #define fb_readb __raw_readb #define fb_readw __raw_readw -- cgit v1.2.3 From b8dd786f9417e5885929bfe33a235c76a9c1c569 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Mon, 22 Dec 2008 07:15:03 -0800 Subject: mlx4_core: Add support for multiple completion event vectors When using MSI-X mode, create a completion event queue for each CPU. Report the number of completion EQs in a new struct mlx4_caps member, num_comp_vectors, and extend the mlx4_cq_alloc() interface with a vector parameter so that consumers can specify which completion EQ should be used to report events for the CQ being created. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 371086fd946f..8f659cc29960 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -206,6 +206,7 @@ struct mlx4_caps { int reserved_cqs; int num_eqs; int reserved_eqs; + int num_comp_vectors; int num_mpts; int num_mtt_segs; int fmr_reserved_mtts; @@ -328,6 +329,7 @@ struct mlx4_cq { int arm_sn; int cqn; + unsigned vector; atomic_t refcount; struct completion free; @@ -437,7 +439,7 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, - int collapsed); + unsigned vector, int collapsed); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); -- cgit v1.2.3 From a01777ecf227de735d7e525ecda48fe74b838a17 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 22 Dec 2008 18:30:29 +0000 Subject: uwb: remove unused include/linux/uwb/debug.h Signed-off-by: David Vrabel --- include/linux/uwb/debug.h | 82 ----------------------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 include/linux/uwb/debug.h (limited to 'include/linux') diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h deleted file mode 100644 index 67a240527145..000000000000 --- a/include/linux/uwb/debug.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Ultra Wide Band - * Debug Support - * - * Copyright (C) 2005-2006 Intel Corporation - * Inaky Perez-Gonzalez - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - * - * - * FIXME: doc - * Invoke like: - * - * #define D_LOCAL 4 - * #include - * - * At the end of your include files. - */ -#include - -struct device; -extern void dump_bytes(struct device *dev, const void *_buf, size_t rsize); - -/* Master debug switch; !0 enables, 0 disables */ -#define D_MASTER (!0) - -/* Local (per-file) debug switch; #define before #including */ -#ifndef D_LOCAL -#define D_LOCAL 0 -#endif - -#undef __d_printf -#undef d_fnstart -#undef d_fnend -#undef d_printf -#undef d_dump - -#define __d_printf(l, _tag, _dev, f, a...) \ -do { \ - struct device *__dev = (_dev); \ - if (D_MASTER && D_LOCAL >= (l)) { \ - char __head[64] = ""; \ - if (_dev != NULL) { \ - if ((unsigned long)__dev < 4096) \ - printk(KERN_ERR "E: Corrupt dev %p\n", \ - __dev); \ - else \ - snprintf(__head, sizeof(__head), \ - "%s %s: ", \ - dev_driver_string(__dev), \ - dev_name(__dev)); \ - } \ - printk(KERN_ERR "%s%s" _tag ": " f, __head, \ - __func__, ## a); \ - } \ -} while (0 && _dev) - -#define d_fnstart(l, _dev, f, a...) \ - __d_printf(l, " FNSTART", _dev, f, ## a) -#define d_fnend(l, _dev, f, a...) \ - __d_printf(l, " FNEND", _dev, f, ## a) -#define d_printf(l, _dev, f, a...) \ - __d_printf(l, "", _dev, f, ## a) -#define d_dump(l, _dev, ptr, size) \ -do { \ - struct device *__dev = _dev; \ - if (D_MASTER && D_LOCAL >= (l)) \ - dump_bytes(__dev, ptr, size); \ -} while (0 && _dev) -#define d_test(l) (D_MASTER && D_LOCAL >= (l)) -- cgit v1.2.3 From 908a7a16b852ffd618a9127be8d62432182d81b4 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 22 Dec 2008 20:43:12 -0800 Subject: net: Remove unused netdev arg from some NAPI interfaces. When the napi api was changed to separate its 1:1 binding to the net_device struct, the netif_rx_[prep|schedule|complete] api failed to remove the now vestigual net_device structure parameter. This patch cleans up that api by properly removing it.. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 58856b6737fb..41e1224651cf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1555,8 +1555,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) } /* Test if receive needs to be scheduled but only if up */ -static inline int netif_rx_schedule_prep(struct net_device *dev, - struct napi_struct *napi) +static inline int netif_rx_schedule_prep(struct napi_struct *napi) { return napi_schedule_prep(napi); } @@ -1564,27 +1563,24 @@ static inline int netif_rx_schedule_prep(struct net_device *dev, /* Add interface to tail of rx poll list. This assumes that _prep has * already been called and returned 1. */ -static inline void __netif_rx_schedule(struct net_device *dev, - struct napi_struct *napi) +static inline void __netif_rx_schedule(struct napi_struct *napi) { __napi_schedule(napi); } /* Try to reschedule poll. Called by irq handler. */ -static inline void netif_rx_schedule(struct net_device *dev, - struct napi_struct *napi) +static inline void netif_rx_schedule(struct napi_struct *napi) { - if (netif_rx_schedule_prep(dev, napi)) - __netif_rx_schedule(dev, napi); + if (netif_rx_schedule_prep(napi)) + __netif_rx_schedule(napi); } /* Try to reschedule poll. Called by dev->poll() after netif_rx_complete(). */ -static inline int netif_rx_reschedule(struct net_device *dev, - struct napi_struct *napi) +static inline int netif_rx_reschedule(struct napi_struct *napi) { if (napi_schedule_prep(napi)) { - __netif_rx_schedule(dev, napi); + __netif_rx_schedule(napi); return 1; } return 0; @@ -1593,8 +1589,7 @@ static inline int netif_rx_reschedule(struct net_device *dev, /* same as netif_rx_complete, except that local_irq_save(flags) * has already been issued */ -static inline void __netif_rx_complete(struct net_device *dev, - struct napi_struct *napi) +static inline void __netif_rx_complete(struct napi_struct *napi) { __napi_complete(napi); } @@ -1604,8 +1599,7 @@ static inline void __netif_rx_complete(struct net_device *dev, * it completes the work. The device cannot be out of poll list at this * moment, it is BUG(). */ -static inline void netif_rx_complete(struct net_device *dev, - struct napi_struct *napi) +static inline void netif_rx_complete(struct napi_struct *napi) { napi_complete(napi); } -- cgit v1.2.3 From 160bbab3000dafccbe43688e48208cecf4deb879 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 23 Dec 2008 10:00:14 +0000 Subject: [MTD] struct device - replace bus_id with dev_name(), dev_set_name() Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman Signed-off-by: David Woodhouse --- include/linux/mtd/concat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h index c02f3d264ecf..e80c674daeb3 100644 --- a/include/linux/mtd/concat.h +++ b/include/linux/mtd/concat.h @@ -13,7 +13,7 @@ struct mtd_info *mtd_concat_create( struct mtd_info *subdev[], /* subdevices to concatenate */ int num_devs, /* number of subdevices */ - char *name); /* name for the new device */ + const char *name); /* name for the new device */ void mtd_concat_destroy(struct mtd_info *mtd); -- cgit v1.2.3 From 88a9fe8cae3bb52e82489447f45e8d7ba1409ca8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:31 -0500 Subject: SUNRPC: Remove the last remnant of the BKL... Somehow, this escaped the previous purge. There should be no need to keep any extra locks in the XDR callbacks. The NFS client XDR code only writes into private objects, whereas all reads of shared objects are confined to fields that do not change, such as filehandles... Ditto for lockd, the NFSv2/v3 client mount code, and rpcbind. The nfsd XDR code may require the BKL, but since it does a synchronous RPC call from a thread that already holds the lock, that issue is moot. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index e4057d729f03..49e1eb454465 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -36,21 +36,6 @@ struct xdr_netobj { */ typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); -/* - * We're still requiring the BKL in the xdr code until it's been - * more carefully audited, at which point this wrapper will become - * unnecessary. - */ -static inline int rpc_call_xdrproc(kxdrproc_t xdrproc, void *rqstp, __be32 *data, void *obj) -{ - int ret; - - lock_kernel(); - ret = xdrproc(rqstp, data, obj); - unlock_kernel(); - return ret; -} - /* * Basic structure for transmission/reception of a client XDR message. * Features a header (for a linear buffer containing RPC headers -- cgit v1.2.3 From 146ec944bbd31d241a44a00518b054fb01921d22 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:34 -0500 Subject: NFS: Move declaration of nfs_mount() to fs/nfs/internal.h Clean up: The nfs_mount() function is not to be used outside of the NFS client. Move its public declaration to fs/nfs/internal.h. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4eaa8347a0d9..f11077285a6c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -532,12 +532,6 @@ static inline void nfs3_forget_cached_acls(struct inode *inode) } #endif /* CONFIG_NFS_V3_ACL */ -/* - * linux/fs/mount_clnt.c - */ -extern int nfs_mount(struct sockaddr *, size_t, char *, char *, - int, int, struct nfs_fh *); - /* * inline functions */ -- cgit v1.2.3 From d740351bf0960e89ce1aef45cfe00167cb0f9e5b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:37 -0500 Subject: NFS: add "[no]resvport" mount option The standard default security setting for NFS is AUTH_SYS. An NFS client connects to NFS servers via a privileged source port and a fixed standard destination port (2049). The client sends raw uid and gid numbers to identify users making NFS requests, and the server assumes an appropriate authority on the client has vetted these values because the source port is privileged. On Linux, by default in-kernel RPC services use a privileged port in the range between 650 and 1023 to avoid using source ports of well- known IP services. Using such a small range limits the number of NFS mount points and the number of unique NFS servers to which a client can connect concurrently. An NFS client can use unprivileged source ports to expand the range of source port numbers, allowing more concurrent server connections and more NFS mount points. Servers must explicitly allow NFS connections from unprivileged ports for this to work. In the past, bumping the value of the sunrpc.max_resvport sysctl on the client would permit the NFS client to use unprivileged ports. Bumping this setting also changes the maximum port number used by other in-kernel RPC services, some of which still required a port number less than 1023. This is exacerbated by the way source port numbers are chosen by the Linux RPC client, which starts at the top of the range and works downwards. It means that bumping the maximum means all RPC services requesting a source port will likely get an unprivileged port instead of a privileged one. Changing this setting effects all NFS mount points on a client. A sysadmin could not selectively choose which mount points would use non-privileged ports and which could not. Lastly, this mechanism of expanding the limit on the number of NFS mount points was entirely undocumented. To address the need for the NFS client to use a large range of source ports without interfering with the activity of other in-kernel RPC services, we introduce a new NFS mount option. This option explicitly tells only the NFS client to use a non-privileged source port when communicating with the NFS server for one specific mount point. This new mount option is called "resvport," like the similar NFS mount option on FreeBSD and Mac OS X. A sister patch for nfs-utils will be submitted that documents this new option in nfs(5). The default setting for this new mount option requires the NFS client to use a privileged port, as before. Explicitly specifying the "noresvport" mount option allows the NFS client to use an unprivileged source port for this mount point when connecting to the NFS server port. This mount option is supported only for text-based NFS mounts. [ Sidebar: it is widely known that security mechanisms based on the use of privileged source ports are ineffective. However, the NFS client can combine the use of unprivileged ports with the use of secure authentication mechanisms, such as Kerberos. This allows a large number of connections and mount points while ensuring a useful level of security. Eventually we may change the default setting for this option depending on the security flavor used for the mount. For example, if the mount is using only AUTH_SYS, then the default setting will be "resvport;" if the mount is using a strong security flavor such as krb5, the default setting will be "noresvport." ] Signed-off-by: Chuck Lever [Trond.Myklebust@netapp.com: Fixed a bug whereby nfs4_init_client() was being called with incorrect arguments.] Signed-off-by: Trond Myklebust --- include/linux/nfs_mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 6549a06ac16e..4499016e6d0d 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -45,7 +45,7 @@ struct nfs_mount_data { char context[NFS_MAX_CONTEXT_LEN + 1]; /* 6 */ }; -/* bits in the flags field */ +/* bits in the flags field visible to user space */ #define NFS_MOUNT_SOFT 0x0001 /* 1 */ #define NFS_MOUNT_INTR 0x0002 /* 1 */ /* now unused, but ABI */ @@ -68,5 +68,6 @@ struct nfs_mount_data { /* The following are for internal use only */ #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000 #define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000 +#define NFS_MOUNT_NORESVPORT 0x40000 #endif -- cgit v1.2.3 From 0cb2659b818eca99235e17c04291cfa9985c14f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:38 -0500 Subject: NLM: allow lockd requests from an unprivileged port If the admin has specified the "noresvport" option for an NFS mount point, the kernel's NFS client uses an unprivileged source port for the main NFS transport. The kernel's lockd client should use an unprivileged port in this case as well. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/lockd/bind.h | 1 + include/linux/lockd/lockd.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index e5872dc994c0..fbc48f898521 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -41,6 +41,7 @@ struct nlmclnt_initdata { size_t addrlen; unsigned short protocol; u32 nfs_version; + int noresvport; }; /* diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index b56d5aa9b194..23da3fa69efa 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -49,6 +49,7 @@ struct nlm_host { unsigned short h_proto; /* transport proto */ unsigned short h_reclaiming : 1, h_server : 1, /* server side, not client side */ + h_noresvport : 1, h_inuse : 1; wait_queue_head_t h_gracewait; /* wait while reclaiming */ struct rw_semaphore h_rwsem; /* Reboot recovery lock */ @@ -220,7 +221,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const size_t salen, const unsigned short protocol, const u32 version, - const char *hostname); + const char *hostname, + int noresvport); struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, const char *hostname, const size_t hostname_len); -- cgit v1.2.3 From 95d35cb4c473c754824967c0b069bbeb7efa4847 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:45 -0500 Subject: NFSv4: Remove nfs_client->cl_sem Now that we're using the flags to indicate state that needs to be recovered, as well as having implemented proper refcounting and spinlocking on the state and open_owners, we can get rid of nfs_client->cl_sem. The only remaining case that was dubious was the file locking, and that case is now covered by the nfsi->rwsem. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4e477ae58699..9bb81aec91cf 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -42,12 +42,6 @@ struct nfs_client { struct rb_root cl_openowner_id; struct rb_root cl_lockowner_id; - /* - * The following rwsem ensures exclusive access to the server - * while we recover the state following a lease expiration. - */ - struct rw_semaphore cl_sem; - struct list_head cl_delegations; struct rb_root cl_state_owners; spinlock_t cl_lock; -- cgit v1.2.3 From bd7bf9d540c001055fba796ebf146d90e4dd2eb2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:53 -0500 Subject: NFSv4: Convert delegation->type field to fmode_t Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- include/linux/nfs_xdr.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f11077285a6c..8d71d7b7c78a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -180,7 +180,7 @@ struct nfs_inode { /* NFSv4 state */ struct list_head open_states; struct nfs_delegation *delegation; - int delegation_state; + fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ struct inode vfs_inode; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c1c31acb8a2b..32c1a0ecdbff 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -126,7 +126,7 @@ struct nfs_openargs { struct iattr * attrs; /* UNCHECKED, GUARDED */ nfs4_verifier verifier; /* EXCLUSIVE */ nfs4_stateid delegation; /* CLAIM_DELEGATE_CUR */ - int delegation_type; /* CLAIM_PREVIOUS */ + fmode_t delegation_type; /* CLAIM_PREVIOUS */ } u; const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ @@ -143,7 +143,7 @@ struct nfs_openres { struct nfs_fattr * dir_attr; struct nfs_seqid * seqid; const struct nfs_server *server; - int delegation_type; + fmode_t delegation_type; nfs4_stateid delegation; __u32 do_recall; __u64 maxsize; -- cgit v1.2.3 From dc0b027dfadfcb8a5504f7d8052754bf8d501ab9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:56 -0500 Subject: NFSv4: Convert the open and close ops to use fmode Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 ++-- include/linux/nfs_xdr.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8d71d7b7c78a..b8d9c6dd4f63 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -83,7 +83,7 @@ struct nfs_open_context { struct rpc_cred *cred; struct nfs4_state *state; fl_owner_t lockowner; - int mode; + fmode_t mode; unsigned long flags; #define NFS_CONTEXT_ERROR_WRITE (0) @@ -342,7 +342,7 @@ extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); -extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); +extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode); extern u64 nfs_compat_user_ino64(u64 fileid); extern void nfs_fattr_init(struct nfs_fattr *fattr); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 32c1a0ecdbff..a550b528319f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -120,6 +120,7 @@ struct nfs_openargs { const struct nfs_fh * fh; struct nfs_seqid * seqid; int open_flags; + fmode_t fmode; __u64 clientid; __u64 id; union { @@ -171,7 +172,7 @@ struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid * stateid; struct nfs_seqid * seqid; - int open_flags; + fmode_t fmode; const u32 * bitmask; }; -- cgit v1.2.3 From 64672d55d93c26fb4035fd1a84a803cbc09cb058 Mon Sep 17 00:00:00 2001 From: Peter Staubach Date: Tue, 23 Dec 2008 15:21:56 -0500 Subject: optimize attribute timeouts for "noac" and "actimeo=0" Hi. I've been looking at a bugzilla which describes a problem where a customer was advised to use either the "noac" or "actimeo=0" mount options to solve a consistency problem that they were seeing in the file attributes. It turned out that this solution did not work reliably for them because sometimes, the local attribute cache was believed to be valid and not timed out. (With an attribute cache timeout of 0, the cache should always appear to be timed out.) In looking at this situation, it appears to me that the problem is that the attribute cache timeout code has an off-by-one error in it. It is assuming that the cache is valid in the region, [read_cache_jiffies, read_cache_jiffies + attrtimeo]. The cache should be considered valid only in the region, [read_cache_jiffies, read_cache_jiffies + attrtimeo). With this change, the options, "noac" and "actimeo=0", work as originally expected. This problem was previously addressed by special casing the attrtimeo == 0 case. However, since the problem is only an off- by-one error, the cleaner solution is address the off-by-one error and thus, not require the special case. Thanx... ps Signed-off-by: Peter Staubach Signed-off-by: Trond Myklebust --- include/linux/jiffies.h | 10 ++++++++++ include/linux/nfs_fs.h | 5 ++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index abb6ac639e8e..1a9cf78bfce5 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -115,10 +115,20 @@ static inline u64 get_jiffies_64(void) ((long)(a) - (long)(b) >= 0)) #define time_before_eq(a,b) time_after_eq(b,a) +/* + * Calculate whether a is in the range of [b, c]. + */ #define time_in_range(a,b,c) \ (time_after_eq(a,b) && \ time_before_eq(a,c)) +/* + * Calculate whether a is in the range of [b, c). + */ +#define time_in_range_open(a,b,c) \ + (time_after_eq(a,b) && \ + time_before(a,c)) + /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of * get_jiffies_64() */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b8d9c6dd4f63..db867b04ac3c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -130,7 +130,10 @@ struct nfs_inode { * * We need to revalidate the cached attrs for this inode if * - * jiffies - read_cache_jiffies > attrtimeo + * jiffies - read_cache_jiffies >= attrtimeo + * + * Please note the comparison is greater than or equal + * so that zero timeout values can be specified. */ unsigned long read_cache_jiffies; unsigned long attrtimeo; -- cgit v1.2.3 From c977a2ef40a38c45537ad03823d0a004f06373f0 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 23 Dec 2008 16:06:13 -0500 Subject: sunrpc: get rid of rpc_rqst.rq_bufsize rq_bufsize is not used. Signed-off-by: Mike Sager Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4d80a118d538..11fc71d50c1e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -76,8 +76,7 @@ struct rpc_rqst { struct list_head rq_list; __u32 * rq_buffer; /* XDR encode buffer */ - size_t rq_bufsize, - rq_callsize, + size_t rq_callsize, rq_rcvsize; struct xdr_buf rq_private_buf; /* The receive buffer -- cgit v1.2.3 From c381060869317b3c84430d4f54965d409cbfe65f Mon Sep 17 00:00:00 2001 From: "\\\"J. Bruce Fields\\" Date: Tue, 23 Dec 2008 16:08:32 -0500 Subject: rpc: add an rpc_pipe_open method We want to transition to a new gssd upcall which is text-based and more easily extensible. To simplify upgrades, as well as testing and debugging, it will help if we can upgrade gssd (to a version which understands the new upcall) without having to choose at boot (or module-load) time whether we want the new or the old upcall. We will do this by providing two different pipes: one named, as currently, after the mechanism (normally "krb5"), and supporting the old upcall. One named "gssd" and supporting the new upcall version. We allow gssd to indicate which version it supports by its choice of which pipe to open. As we have no interest in supporting *simultaneous* use of both versions, we'll forbid opening both pipes at the same time. So, add a new pipe_open callback to the rpc_pipefs api, which the gss code can use to track which pipes have been open, and to refuse opens of incompatible pipes. We only need this to be called on the first open of a given pipe. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 51b977a4ca20..cea764c2359f 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -15,6 +15,7 @@ struct rpc_pipe_ops { ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char __user *, size_t); ssize_t (*downcall)(struct file *, const char __user *, size_t); void (*release_pipe)(struct inode *); + int (*open_pipe)(struct inode *); void (*destroy_msg)(struct rpc_pipe_msg *); }; -- cgit v1.2.3 From 68e76ad0baf8f5d5060377c2423ee6eed5c63057 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Dec 2008 16:17:15 -0500 Subject: nfsd: pass client principal name in rsc downcall Two principals are involved in krb5 authentication: the target, who we authenticate *to* (normally the name of the server, like nfs/server.citi.umich.edu@CITI.UMICH.EDU), and the source, we we authenticate *as* (normally a user, like bfields@UMICH.EDU) In the case of NFSv4 callbacks, the target of the callback should be the source of the client's setclientid call, and the source should be the nfs server's own principal. Therefore we allow svcgssd to pass down the name of the principal that just authenticated, so that on setclientid we can store that principal name with the new client, to be used later on callbacks. Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/nfsd/state.h | 1 + include/linux/sunrpc/svcauth_gss.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index d0fe2e378452..ce7cbf4b7c93 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -124,6 +124,7 @@ struct nfs4_client { nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ __be32 cl_addr; /* client ipaddress */ + char *cl_principal; /* setclientid principal name */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index c9165d9771a8..ca7d725861fc 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -20,6 +20,7 @@ int gss_svc_init(void); void gss_svc_shutdown(void); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); u32 svcauth_gss_flavor(struct auth_domain *dom); +char *svc_gss_principal(struct svc_rqst *); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ -- cgit v1.2.3 From 608207e8884e083ad8b8d33eda868da70f0d63e8 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Dec 2008 16:17:40 -0500 Subject: rpc: pass target name down to rpc level on callbacks The rpc client needs to know the principal that the setclientid was done as, so it can tell gssd who to authenticate to. Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6f0ee1b84a4f..c39a21040dcb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -58,6 +58,7 @@ struct rpc_clnt { struct rpc_timeout cl_timeout_default; struct rpc_program * cl_program; char cl_inline_name[32]; + char *cl_principal; /* target to authenticate to */ }; /* @@ -108,6 +109,7 @@ struct rpc_create_args { u32 version; rpc_authflavor_t authflavor; unsigned long flags; + char *client_name; }; /* Values for "flags" field */ -- cgit v1.2.3 From 61054b14d545e257b9415d5ca0cd5f43762b4d0c Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Dec 2008 16:19:00 -0500 Subject: nfsd: support callbacks with gss flavors This patch adds server-side support for callbacks other than AUTH_SYS. Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/nfsd/state.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index ce7cbf4b7c93..128298c0362d 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -124,6 +124,7 @@ struct nfs4_client { nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ __be32 cl_addr; /* client ipaddress */ + u32 cl_flavor; /* setclientid pseudoflavor */ char *cl_principal; /* setclientid principal name */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ -- cgit v1.2.3 From 4a7794860ba2b56693b1d89fd485fd08cdc763e3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 13 Sep 2008 18:19:03 -0700 Subject: crypto: api - Move type exit function into crypto_tfm The type exit function needs to undo any allocations done by the type init function. However, the type init function may differ depending on the upper-level type of the transform (e.g., a crypto_blkcipher instantiated as a crypto_ablkcipher). So we need to move the exit function out of the lower-level structure and into crypto_tfm itself. As it stands this is a no-op since nobody uses exit functions at all. However, all cases where a lower-level type is instantiated as a different upper-level type (such as blkcipher as ablkcipher) will be converted such that they allocate the underlying transform and use that instead of casting (e.g., crypto_ablkcipher casted into crypto_blkcipher). That will need to use a different exit function depending on the upper-level type. This patch also allows the type init/exit functions to call (or not) cra_init/cra_exit instead of always calling them from the top level. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3d2317e4af2e..ea52cd944fd9 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -480,6 +480,8 @@ struct crypto_tfm { struct compress_tfm compress; struct rng_tfm rng; } crt_u; + + void (*exit)(struct crypto_tfm *tfm); struct crypto_alg *__crt_alg; -- cgit v1.2.3 From 7b0bac64cd5b74d6f1147524c26216de13a501fd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 21 Sep 2008 06:52:53 +0900 Subject: crypto: api - Rebirth of crypto_alloc_tfm This patch reintroduces a completely revamped crypto_alloc_tfm. The biggest change is that we now take two crypto_type objects when allocating a tfm, a frontend and a backend. In fact this simply formalises what we've been doing behind the API's back. For example, as it stands crypto_alloc_ahash may use an actual ahash algorithm or a crypto_hash algorithm. Putting this in the API allows us to do this much more cleanly. The existing types will be converted across gradually. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ea52cd944fd9..ffaaa418cf59 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -546,7 +546,9 @@ struct crypto_attr_u32 { * Transform user interface. */ -struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, u32 tfm_flags); +struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, + const struct crypto_type *frontend, + u32 type, u32 mask); struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask); void crypto_free_tfm(struct crypto_tfm *tfm); -- cgit v1.2.3 From 7b5a080b3c46f0cac71c0d0262634c6517d4ee4f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 31 Aug 2008 15:47:27 +1000 Subject: crypto: hash - Add shash interface The shash interface replaces the current synchronous hash interface. It improves over hash in two ways. Firstly shash is reentrant, meaning that the same tfm may be used by two threads simultaneously as all hashing state is stored in a local descriptor. The other enhancement is that shash no longer takes scatter list entries. This is because shash is specifically designed for synchronous algorithms and as such scatter lists are unnecessary. All existing hash users will be converted to shash once the algorithms have been completely converted. There is also a new finup function that combines update with final. This will be extended to ahash once the algorithm conversion is done. This is also the first time that an algorithm type has their own registration function. Existing algorithm types will be converted to this way in due course. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ffaaa418cf59..ee95c748695c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -39,6 +39,7 @@ #define CRYPTO_ALG_TYPE_HASH 0x00000009 #define CRYPTO_ALG_TYPE_AHASH 0x0000000a #define CRYPTO_ALG_TYPE_RNG 0x0000000c +#define CRYPTO_ALG_TYPE_SHASH 0x0000000d #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000c -- cgit v1.2.3 From 3b2f6df08258e2875f42bd630eece7e7241a053b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 31 Aug 2008 18:52:18 +1000 Subject: crypto: hash - Export shash through ahash This patch allows shash algorithms to be used through the ahash interface. This is required before we can convert digest algorithms over to shash. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ee95c748695c..44c72f0f9b05 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -38,8 +38,8 @@ #define CRYPTO_ALG_TYPE_DIGEST 0x00000008 #define CRYPTO_ALG_TYPE_HASH 0x00000009 #define CRYPTO_ALG_TYPE_AHASH 0x0000000a +#define CRYPTO_ALG_TYPE_SHASH 0x0000000b #define CRYPTO_ALG_TYPE_RNG 0x0000000c -#define CRYPTO_ALG_TYPE_SHASH 0x0000000d #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000c -- cgit v1.2.3 From dec8b78606ebd5f309c38f2fb10196ce996dd18d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Nov 2008 21:38:11 +0800 Subject: crypto: hash - Add import/export interface It is often useful to save the partial state of a hash function so that it can be used as a base for two or more computations. The most prominent example is HMAC where all hashes start from a base determined by the key. Having an import/export interface means that we only have to compute that base once rather than for each message. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 44c72f0f9b05..77a1f3d9416d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -221,6 +221,7 @@ struct ablkcipher_alg { struct ahash_alg { int (*init)(struct ahash_request *req); + int (*reinit)(struct ahash_request *req); int (*update)(struct ahash_request *req); int (*final)(struct ahash_request *req); int (*digest)(struct ahash_request *req); -- cgit v1.2.3 From 5f7082ed4f482f05db01d84dbf58190492ebf0ad Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 31 Aug 2008 22:21:09 +1000 Subject: crypto: hash - Export shash through hash This patch allows shash algorithms to be used through the old hash interface. This is a transitional measure so we can convert the underlying algorithms to shash before converting the users across. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 77a1f3d9416d..3bacd71509fb 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -36,9 +36,9 @@ #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_DIGEST 0x00000008 -#define CRYPTO_ALG_TYPE_HASH 0x00000009 +#define CRYPTO_ALG_TYPE_HASH 0x00000008 +#define CRYPTO_ALG_TYPE_SHASH 0x00000009 #define CRYPTO_ALG_TYPE_AHASH 0x0000000a -#define CRYPTO_ALG_TYPE_SHASH 0x0000000b #define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e -- cgit v1.2.3 From 69c35efcf1576ab5f00cba83e8ca740923afb6c9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 7 Nov 2008 15:11:47 +0800 Subject: libcrc32c: Move implementation to crypto crc32c This patch swaps the role of libcrc32c and crc32c. Previously the implementation was in libcrc32c and crc32c was a wrapper. Now the code is in crc32c and libcrc32c just calls the crypto layer. The reason for the change is to tap into the algorithm selection capability of the crypto API so that optimised implementations such as the one utilising Intel's CRC32C instruction can be used where available. Signed-off-by: Herbert Xu --- include/linux/crc32c.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 508f512e5a2f..66fa8ff795ec 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -3,9 +3,6 @@ #include -extern u32 crc32c_le(u32 crc, unsigned char const *address, size_t length); -extern u32 crc32c_be(u32 crc, unsigned char const *address, size_t length); - -#define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length) +extern u32 crc32c(u32 crc, const void *address, unsigned int length); #endif /* _LINUX_CRC32C_H */ -- cgit v1.2.3 From 0426c166424ea6d3d0412f47879c8ba268f874c4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 11 Nov 2008 12:20:06 +0800 Subject: libcrc32c: Add crc32c_le macro The bnx2x driver actually uses the crc32c_le name so this patch restores the crc32c_le symbol through a macro. Signed-off-by: Herbert Xu --- include/linux/crc32c.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 66fa8ff795ec..bd8b44d96bdc 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -5,4 +5,7 @@ extern u32 crc32c(u32 crc, const void *address, unsigned int length); +/* This macro exists for backwards-compatibility. */ +#define crc32c_le crc32c + #endif /* _LINUX_CRC32C_H */ -- cgit v1.2.3 From f9af0e70911e9d6cc9a68f784dca86415486084d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Fri, 26 Dec 2008 12:24:24 +0900 Subject: irq: for_each_irq_desc() move to irqnr.h Impact: cleanup before CONFIG_SPARSE_IRQ age, for_each_irq_desc() sat in irqnr.h and could be called from generic code. CONFIG_SPARSE_IRQ breaks this assumption, but SPARSE_IRQ version for_each_irq_desc() also can move into irqnr.h easily. Also, this patch unifies CONFIG_SPARSE_IRQ and !CONFIG_SPARSE_IRQ for_each_irq_desc(). Signed-off-by: KOSAKI Motohiro Signed-off-by: Ingo Molnar --- include/linux/irq.h | 24 ++++-------------------- include/linux/irqnr.h | 19 +++++++++---------- 2 files changed, 13 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 98564dc64476..69da275c0ebd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -202,33 +202,17 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; - -static inline struct irq_desc *irq_to_desc(unsigned int irq) -{ - return (irq < NR_IRQS) ? irq_desc + irq : NULL; -} -static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) -{ - return irq_to_desc(irq); -} - -#else - -extern struct irq_desc *irq_to_desc(unsigned int irq); -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +#else /* CONFIG_SPARSE_IRQ */ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq)) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq)) - #define kstat_irqs_this_cpu(DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]) #define kstat_incr_irqs_this_cpu(irqno, DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]++) -#endif +#endif /* CONFIG_SPARSE_IRQ */ + +extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); static inline struct irq_desc * irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 95d2b74641f5..c4a59c7a478b 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -15,20 +15,19 @@ # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1; irq >= 0; irq--) -#else +#else /* CONFIG_GENERIC_HARDIRQS */ extern int nr_irqs; +extern struct irq_desc *irq_to_desc(unsigned int irq); -#ifndef CONFIG_SPARSE_IRQ +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ + irq++, desc = irq_to_desc(irq)) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; \ + irq--, desc = irq_to_desc(irq)) -struct irq_desc; -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ - irq >= 0; irq--, desc--) -#endif -#endif +#endif /* CONFIG_GENERIC_HARDIRQS */ #define for_each_irq_nr(irq) \ for (irq = 0; irq < nr_irqs; irq++) -- cgit v1.2.3 From 18eefedfe8ad33e8fc7614c13359e29a9fab4644 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Fri, 26 Dec 2008 12:29:48 +0900 Subject: irq: simplify for_each_irq_desc() usage Impact: cleanup all for_each_irq_desc() usage point have !desc check. then its check can move into for_each_irq_desc() macro. Signed-off-by: KOSAKI Motohiro Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irqnr.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index c4a59c7a478b..5504a5c97836 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -22,10 +22,14 @@ extern struct irq_desc *irq_to_desc(unsigned int irq); # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ - irq++, desc = irq_to_desc(irq)) + irq++, desc = irq_to_desc(irq)) \ + if (desc) + + # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; \ - irq--, desc = irq_to_desc(irq)) + irq--, desc = irq_to_desc(irq)) \ + if (desc) #endif /* CONFIG_GENERIC_HARDIRQS */ -- cgit v1.2.3 From 13a0c3c269b223f60abfac8a9811d77111a8b4ba Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 26 Dec 2008 02:05:47 -0800 Subject: sparseirq: work around compiler optimizing away __weak functions Impact: fix panic on null pointer with sparseirq Some GCC versions seem to inline the weak global function, when that function is empty. Work it around, by making the functions return a (dummy) integer. Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- include/linux/irq.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 69da275c0ebd..0e40af4bac40 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -193,9 +193,9 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -extern void early_irq_init(void); -extern void arch_early_irq_init(void); -extern void arch_init_chip_data(struct irq_desc *desc, int cpu); +extern int early_irq_init(void); +extern int arch_early_irq_init(void); +extern int arch_init_chip_data(struct irq_desc *desc, int cpu); extern void arch_init_copy_chip_data(struct irq_desc *old_desc, struct irq_desc *desc, int cpu); extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); -- cgit v1.2.3 From 70a7d3cc1308a55104fbe505d76f2aca8a4cf53e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 22 Dec 2008 10:26:05 -0800 Subject: swiotlb: add hwdev to swiotlb_phys_to_bus() / swiotlb_sg_to_bus() Impact: extend functions with a (yet unused) parameter, update callsites Some architectures need it - in preparation for highmem swiotlb. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 325af1de0351..dedd3c0cfe30 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -27,7 +27,8 @@ swiotlb_init(void); extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs); extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); -extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address); +extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, + phys_addr_t address); extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size); -- cgit v1.2.3 From 1eca4365be25c540650693e941bc06a66cf38f94 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 20:03:17 +0900 Subject: libata: beef up iterators There currently are the following looping constructs. * __ata_port_for_each_link() for all available links * ata_port_for_each_link() for edge links * ata_link_for_each_dev() for all devices * ata_link_for_each_dev_reverse() for all devices in reverse order Now there's a need for looping construct which is similar to __ata_port_for_each_link() but iterates over PMP links before the host link. Instead of adding another one with long name, do the following cleanup. * Implement and export ata_link_next() and ata_dev_next() which take @mode parameter and can be used to build custom loop. * Implement ata_for_each_link() and ata_for_each_dev() which take looping mode explicitly. The following iteration modes are implemented. * ATA_LITER_EDGE : loop over edge links * ATA_LITER_HOST_FIRST : loop over all links, host link first * ATA_LITER_PMP_FIRST : loop over all links, PMP links first * ATA_DITER_ENABLED : loop over enabled devices * ATA_DITER_ENABLED_REVERSE : loop over enabled devices in reverse order * ATA_DITER_ALL : loop over all devices * ATA_DITER_ALL_REVERSE : loop over all devices in reverse order This change removes exlicit device enabledness checks from many loops and makes it clear which ones are iterated over in which direction. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 76 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index ed3f26eb5df1..3b2a0c6444ee 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1285,26 +1285,62 @@ static inline int ata_link_active(struct ata_link *link) return ata_tag_valid(link->active_tag) || link->sactive; } -extern struct ata_link *__ata_port_next_link(struct ata_port *ap, - struct ata_link *link, - bool dev_only); - -#define __ata_port_for_each_link(link, ap) \ - for ((link) = __ata_port_next_link((ap), NULL, false); (link); \ - (link) = __ata_port_next_link((ap), (link), false)) - -#define ata_port_for_each_link(link, ap) \ - for ((link) = __ata_port_next_link((ap), NULL, true); (link); \ - (link) = __ata_port_next_link((ap), (link), true)) - -#define ata_link_for_each_dev(dev, link) \ - for ((dev) = (link)->device; \ - (dev) < (link)->device + ata_link_max_devices(link) || ((dev) = NULL); \ - (dev)++) - -#define ata_link_for_each_dev_reverse(dev, link) \ - for ((dev) = (link)->device + ata_link_max_devices(link) - 1; \ - (dev) >= (link)->device || ((dev) = NULL); (dev)--) +/* + * Iterators + * + * ATA_LITER_* constants are used to select link iteration mode and + * ATA_DITER_* device iteration mode. + * + * For a custom iteration directly using ata_{link|dev}_next(), if + * @link or @dev, respectively, is NULL, the first element is + * returned. @dev and @link can be any valid device or link and the + * next element according to the iteration mode will be returned. + * After the last element, NULL is returned. + */ +enum ata_link_iter_mode { + ATA_LITER_EDGE, /* if present, PMP links only; otherwise, + * host link. no slave link */ + ATA_LITER_HOST_FIRST, /* host link followed by PMP or slave links */ + ATA_LITER_PMP_FIRST, /* PMP links followed by host link, + * slave link still comes after host link */ +}; + +enum ata_dev_iter_mode { + ATA_DITER_ENABLED, + ATA_DITER_ENABLED_REVERSE, + ATA_DITER_ALL, + ATA_DITER_ALL_REVERSE, +}; + +extern struct ata_link *ata_link_next(struct ata_link *link, + struct ata_port *ap, + enum ata_link_iter_mode mode); + +extern struct ata_device *ata_dev_next(struct ata_device *dev, + struct ata_link *link, + enum ata_dev_iter_mode mode); + +/* + * Shortcut notation for iterations + * + * ata_for_each_link() iterates over each link of @ap according to + * @mode. @link points to the current link in the loop. @link is + * NULL after loop termination. ata_for_each_dev() works the same way + * except that it iterates over each device of @link. + * + * Note that the mode prefixes ATA_{L|D}ITER_ shouldn't need to be + * specified when using the following shorthand notations. Only the + * mode itself (EDGE, HOST_FIRST, ENABLED, etc...) should be + * specified. This not only increases brevity but also makes it + * impossible to use ATA_LITER_* for device iteration or vice-versa. + */ +#define ata_for_each_link(link, ap, mode) \ + for ((link) = ata_link_next(NULL, (ap), ATA_LITER_##mode); (link); \ + (link) = ata_link_next((link), (ap), ATA_LITER_##mode)) + +#define ata_for_each_dev(dev, link, mode) \ + for ((dev) = ata_dev_next(NULL, (link), ATA_DITER_##mode); (dev); \ + (dev) = ata_dev_next((dev), (link), ATA_DITER_##mode)) /** * ata_ncq_enabled - Test whether NCQ is enabled -- cgit v1.2.3 From ece180d1cfe5fa751eaa85bf796cf28b2150af15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 20:04:37 +0900 Subject: libata: perform port detach in EH ata_port_detach() first made sure EH saw ATA_PFLAG_UNLOADING and then assumed EH context belongs to it and performed detach operation itself. However, UNLOADING doesn't disable all of EH and this could lead to problems including triggering WARN_ON()'s in EH path. This patch makes port detach behave more like other EH actions such that ata_port_detach() requests EH to detach and waits for completion. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3b2a0c6444ee..3449de597eff 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -213,10 +213,11 @@ enum { ATA_PFLAG_FROZEN = (1 << 2), /* port is frozen */ ATA_PFLAG_RECOVERED = (1 << 3), /* recovery action performed */ ATA_PFLAG_LOADING = (1 << 4), /* boot/loading probe */ - ATA_PFLAG_UNLOADING = (1 << 5), /* module is unloading */ ATA_PFLAG_SCSI_HOTPLUG = (1 << 6), /* SCSI hotplug scheduled */ ATA_PFLAG_INITIALIZING = (1 << 7), /* being initialized, don't touch */ ATA_PFLAG_RESETTING = (1 << 8), /* reset in progress */ + ATA_PFLAG_UNLOADING = (1 << 9), /* driver is being unloaded */ + ATA_PFLAG_UNLOADED = (1 << 10), /* driver is unloaded */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ -- cgit v1.2.3 From 88e740f1654bf28565edd528a060695c1f2b5ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Mon, 27 Oct 2008 18:44:46 +0900 Subject: block: add queue flag for paravirt frontend drivers As is the case with SSD devices, we do not want to idle in AS/CFQ when the block device is a paravirt front-end driver. This patch adds a flag (QUEUE_FLAG_VIRT) which should be used by front-end drivers such as virtio_blk and xen-blkfront to indicate a paravirtualized device. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 031a315c0509..482e9600f7a2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -449,6 +449,7 @@ struct request_queue #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ static inline int queue_is_locked(struct request_queue *q) { -- cgit v1.2.3 From 08bafc0341f2f7920e9045bc32c40299cac8c21b Mon Sep 17 00:00:00 2001 From: Keith Mannthey Date: Tue, 25 Nov 2008 10:24:35 +0100 Subject: block: Supress Buffer I/O errors when SCSI REQ_QUIET flag set Allow the scsi request REQ_QUIET flag to be propagated to the buffer file system layer. The basic ideas is to pass the flag from the scsi request to the bio (block IO) and then to the buffer layer. The buffer layer can then suppress needless printks. This patch declutters the kernel log by removed the 40-50 (per lun) buffer io error messages seen during a boot in my multipath setup . It is a good chance any real errors will be missed in the "noise" it the logs without this patch. During boot I see blocks of messages like " __ratelimit: 211 callbacks suppressed Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242847 Buffer I/O error on device sdm, logical block 1 Buffer I/O error on device sdm, logical block 5242878 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242872 " in my logs. My disk environment is multipath fiber channel using the SCSI_DH_RDAC code and multipathd. This topology includes an "active" and "ghost" path for each lun. IO's to the "ghost" path will never complete and the SCSI layer, via the scsi device handler rdac code, quick returns the IOs to theses paths and sets the REQ_QUIET scsi flag to suppress the scsi layer messages. I am wanting to extend the QUIET behavior to include the buffer file system layer to deal with these errors as well. I have been running this patch for a while now on several boxes without issue. A few runs of bonnie++ show no noticeable difference in performance in my setup. Thanks for John Stultz for the quiet_error finalization. Submitted-by: Keith Mannthey Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + include/linux/buffer_head.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6a642098e5c3..cf132bfbbacf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -117,6 +117,7 @@ struct bio { #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ #define BIO_NULL_MAPPED 9 /* contains invalid user pages */ #define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ +#define BIO_QUIET 11 /* Make BIO Quiet */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 3ce64b90118c..8605f8a74df9 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -35,6 +35,7 @@ enum bh_state_bits { BH_Ordered, /* ordered write */ BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ + BH_Quiet, /* Buffer Error Prinks to be quiet */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities -- cgit v1.2.3 From 64d01dc9e1927e6535627d73f2336c75d1dd3fe2 Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Wed, 3 Dec 2008 12:41:39 +0100 Subject: block: use cancel_work_sync() instead of kblockd_flush_work() After many improvements on kblockd_flush_work, it is now identical to cancel_work_sync, so a direct call to cancel_work_sync is suggested. The only difference is that cancel_work_sync is a GPL symbol, so no non-GPL modules anymore. Signed-off-by: Cheng Renquan Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 482e9600f7a2..e9bb73ff1d64 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -978,7 +978,6 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); -void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From ba744d5e290055d171c68067259fcc1e2721f542 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Wed, 3 Dec 2008 12:41:40 +0100 Subject: block: reorder struct bio to remove padding on 64bit Remove 8 bytes of padding from struct bio which also removes 16 bytes from struct bio_pair to make it 248 bytes. bio_pair then fits into one fewer cache lines & into a smaller slab. Signed-off-by: Richard Kennedy Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index cf132bfbbacf..3ed714eb54d9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -90,10 +90,11 @@ struct bio { unsigned int bi_comp_cpu; /* completion CPU */ + atomic_t bi_cnt; /* pin count */ + struct bio_vec *bi_io_vec; /* the actual vec list */ bio_end_io_t *bi_end_io; - atomic_t bi_cnt; /* pin count */ void *bi_private; #if defined(CONFIG_BLK_DEV_INTEGRITY) -- cgit v1.2.3 From 313e42999dbc0f234ca5909a236f78f082cb43b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Nov 2008 13:32:02 +0900 Subject: block: reorganize QUEUE_ORDERED_* constants Separate out ordering type (drain,) and action masks (preflush, postflush, fua) from visible ordering mode selectors (QUEUE_ORDERED_*). Ordering types are now named QUEUE_ORDERED_BY_* while action masks are named QUEUE_ORDERED_DO_*. This change is necessary to add QUEUE_ORDERED_DO_BAR and make it optional to improve empty barrier implementation. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e9bb73ff1d64..5c92b4432399 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -523,22 +523,29 @@ enum { * TAG_FLUSH : ordering by tag w/ pre and post flushes * TAG_FUA : ordering by tag w/ pre flush and FUA write */ - QUEUE_ORDERED_NONE = 0x00, - QUEUE_ORDERED_DRAIN = 0x01, - QUEUE_ORDERED_TAG = 0x02, - - QUEUE_ORDERED_PREFLUSH = 0x10, - QUEUE_ORDERED_POSTFLUSH = 0x20, - QUEUE_ORDERED_FUA = 0x40, - - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, - QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, - QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, + QUEUE_ORDERED_BY_DRAIN = 0x01, + QUEUE_ORDERED_BY_TAG = 0x02, + QUEUE_ORDERED_DO_PREFLUSH = 0x10, + QUEUE_ORDERED_DO_POSTFLUSH = 0x40, + QUEUE_ORDERED_DO_FUA = 0x80, + + QUEUE_ORDERED_NONE = 0x00, + + QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN, + QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_POSTFLUSH, + QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_FUA, + + QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG, + QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_POSTFLUSH, + QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_FUA, /* * Ordered operation sequence -- cgit v1.2.3 From f671620e7d895af221bdfeda751d54fa55ed9546 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Nov 2008 13:32:04 +0900 Subject: block: make every barrier action optional In all barrier sequences, the barrier write itself was always assumed to be issued and thus didn't have corresponding control flag. This patch adds QUEUE_ORDERED_DO_BAR and unify action mask handling in start_ordered() such that any barrier action can be skipped. This patch doesn't introduce any visible behavior changes. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c92b4432399..b044267009ed 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -526,12 +526,14 @@ enum { QUEUE_ORDERED_BY_DRAIN = 0x01, QUEUE_ORDERED_BY_TAG = 0x02, QUEUE_ORDERED_DO_PREFLUSH = 0x10, + QUEUE_ORDERED_DO_BAR = 0x20, QUEUE_ORDERED_DO_POSTFLUSH = 0x40, QUEUE_ORDERED_DO_FUA = 0x80, QUEUE_ORDERED_NONE = 0x00, - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN, + QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | + QUEUE_ORDERED_DO_BAR, QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_POSTFLUSH, @@ -539,7 +541,8 @@ enum { QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_FUA, - QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG, + QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | + QUEUE_ORDERED_DO_BAR, QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_POSTFLUSH, -- cgit v1.2.3 From 8f11b3e99a1136fcbb67316c3260f085299c0bff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Nov 2008 13:32:05 +0900 Subject: block: make barrier completion more robust Barrier completion had the following assumptions. * start_ordered() couldn't finish the whole sequence properly. If all actions are to be skipped, q->ordseq is set correctly but the actual completion was never triggered thus hanging the barrier request. * Drain completion in elv_complete_request() assumed that there's always at least one request in the queue when drain completes. Both assumptions are true but these assumptions need to be removed to improve empty barrier implementation. This patch makes the following changes. * Make start_ordered() use blk_ordered_complete_seq() to mark skipped steps complete and notify __elv_next_request() that it should fetch the next request if the whole barrier has completed inside start_ordered(). * Make drain completion path in elv_complete_request() check whether the queue is empty. Empty queue also indicates drain completion. * While at it, convert 0/1 return from blk_do_ordered() to false/true. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b044267009ed..3c7078e0129d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -866,10 +866,10 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); -extern int blk_do_ordered(struct request_queue *, struct request **); +extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); +extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -- cgit v1.2.3 From 58eea927d2de43dc6f03d1ba2c46e55854b31540 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Nov 2008 13:32:06 +0900 Subject: block: simplify empty barrier implementation Empty barrier required special handling in __elv_next_request() to complete it without letting the low level driver see it. With previous changes, barrier code is now flexible enough to skip the BAR step using the same barrier sequence selection mechanism. Drop the special handling and mask off q->ordered from start_ordered(). Remove blk_empty_barrier() test which now has no user. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3c7078e0129d..41bbadfd17f6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -596,7 +596,6 @@ enum { #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) #define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) -#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) /* rq->queuelist of dequeued request must be list_empty() */ #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) -- cgit v1.2.3 From 7ff9345ffac56743b5001561bc2dc1e041b79149 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Dec 2008 11:53:43 +0100 Subject: bio: only mempool back the largest bio_vec slab cache We only very rarely need the mempool backing, so it makes sense to get rid of all but one of the mempool in a bio_set. So keep the largest bio_vec count mempool so we can always honor the largest allocation, and "upgrade" callers that fail. Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 3ed714eb54d9..d76e4bf22f29 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -397,13 +397,14 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) */ #define BIO_POOL_SIZE 2 #define BIOVEC_NR_POOLS 6 +#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) struct bio_set { mempool_t *bio_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) mempool_t *bio_integrity_pool; #endif - mempool_t *bvec_pools[BIOVEC_NR_POOLS]; + mempool_t *bvec_pool; }; struct biovec_slab { -- cgit v1.2.3 From 1b4344986926da324b5cd10b683e5a1a5e1b7db3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Oct 2008 20:32:58 +0200 Subject: bio: move the slab pointer inside the bio_set In preparation for adding differently sized bios. Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index d76e4bf22f29..9340098d75dc 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -400,6 +400,7 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) #define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) struct bio_set { + struct kmem_cache *bio_slab; mempool_t *bio_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) mempool_t *bio_integrity_pool; -- cgit v1.2.3 From bb799ca0202a360fa74d5f17039b9100caebdde7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Dec 2008 15:35:05 +0100 Subject: bio: allow individual slabs in the bio_set Instead of having a global bio slab cache, add a reference to one in each bio_set that is created. This allows for personalized slabs in each bio_set, so that they can have bios of different sizes. This means we can personalize the bios we return. File systems may want to embed the bio inside another structure, to avoid allocation more items (and stuffing them in ->bi_private) after the get a bio. Or we may want to embed a number of bio_vecs directly at the end of a bio, to avoid doing two allocations to return a bio. This is now possible. Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9340098d75dc..4b80d3537f97 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -334,7 +334,7 @@ struct bio_pair { extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); -extern struct bio_set *bioset_create(int, int); +extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(gfp_t, int); @@ -379,6 +379,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); +extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); /* @@ -401,6 +402,8 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) struct bio_set { struct kmem_cache *bio_slab; + unsigned int front_pad; + mempool_t *bio_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) mempool_t *bio_integrity_pool; @@ -415,6 +418,7 @@ struct biovec_slab { }; extern struct bio_set *fs_bio_set; +extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly; /* * a small number of entries is fine, not going to be performance critical. -- cgit v1.2.3 From 392ddc32982a5c661dd90dd49a3cb37f1c68b782 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Dec 2008 12:42:54 +0100 Subject: bio: add support for inlining a number of bio_vecs inside the bio When we go and allocate a bio for IO, we actually do two allocations. One for the bio itself, and one for the bi_io_vec that holds the actual pages we are interested in. This feature inlines a definable amount of io vecs inside the bio itself, so we eliminate the bio_vec array allocation for IO's up to a certain size. It defaults to 4 vecs, which is typically 16k of IO. Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 4b80d3537f97..18462c5b8fff 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -102,6 +102,13 @@ struct bio { #endif bio_destructor_t *bi_destructor; /* destructor */ + + /* + * We can inline a number of vecs at the end of the bio, to avoid + * double allocations for a small number of bio_vecs. This member + * MUST obviously be kept at the very end of the bio. + */ + struct bio_vec bi_inline_vecs[0]; }; /* @@ -213,6 +220,11 @@ static inline void *bio_data(struct bio *bio) return NULL; } +static inline int bio_has_allocated_vec(struct bio *bio) +{ + return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs; +} + /* * will die */ -- cgit v1.2.3 From abf137dd7712132ee56d5b3143c2ff61a72a5faa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Dec 2008 08:11:22 +0100 Subject: aio: make the lookup_ioctx() lockless The mm->ioctx_list is currently protected by a reader-writer lock, so we always grab that lock on the read side for doing ioctx lookups. As the workload is extremely reader biased, turn this into an rcu hlist so we can make lookup_ioctx() lockless. Get rid of the rwlock and use a spinlock for providing update side exclusion. There's usually only 1 entry on this list, so it doesn't make sense to look into fancier data structures. Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/aio.h | 5 ++++- include/linux/mm_types.h | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index f6b8cf99b596..b16a957030f8 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -183,7 +184,7 @@ struct kioctx { /* This needs improving */ unsigned long user_id; - struct kioctx *next; + struct hlist_node list; wait_queue_head_t wait; @@ -199,6 +200,8 @@ struct kioctx { struct aio_ring_info ring_info; struct delayed_work wq; + + struct rcu_head rcu_head; }; /* prototypes */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fe825471d5aa..9cfc9b627fdd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -232,8 +232,9 @@ struct mm_struct { struct core_state *core_state; /* coredumping support */ /* aio bits */ - rwlock_t ioctx_list_lock; /* aio lock */ - struct kioctx *ioctx_list; + spinlock_t ioctx_lock; + struct hlist_head ioctx_list; + #ifdef CONFIG_MM_OWNER /* * "owner" points to a task that is regarded as the canonical -- cgit v1.2.3 From b374d18a4bfce705e4a99ae9f501b53e86ecb283 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 31 Oct 2008 10:05:07 +0100 Subject: block: get rid of elevator_t typedef Just use struct elevator_queue everywhere instead. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- include/linux/elevator.h | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 41bbadfd17f6..7035cec583b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -26,7 +26,6 @@ struct scsi_ioctl_command; struct request_queue; struct elevator_queue; -typedef struct elevator_queue elevator_t; struct request_pm_state; struct blk_trace; struct request; @@ -313,7 +312,7 @@ struct request_queue */ struct list_head queue_head; struct request *last_merge; - elevator_t *elevator; + struct elevator_queue *elevator; /* * the queue request freelist, one for reads and one for writes diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 92f6f634e3e6..7a204256b155 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -28,7 +28,7 @@ typedef void (elevator_activate_req_fn) (struct request_queue *, struct request typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); typedef void *(elevator_init_fn) (struct request_queue *); -typedef void (elevator_exit_fn) (elevator_t *); +typedef void (elevator_exit_fn) (struct elevator_queue *); struct elevator_ops { @@ -62,8 +62,8 @@ struct elevator_ops struct elv_fs_entry { struct attribute attr; - ssize_t (*show)(elevator_t *, char *); - ssize_t (*store)(elevator_t *, const char *, size_t); + ssize_t (*show)(struct elevator_queue *, char *); + ssize_t (*store)(struct elevator_queue *, const char *, size_t); }; /* @@ -130,7 +130,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *); extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); -extern void elevator_exit(elevator_t *); +extern void elevator_exit(struct elevator_queue *); extern int elv_rq_merge_ok(struct request *, struct bio *); /* -- cgit v1.2.3 From a6f23657d3072bde6844055bbc2290e497f33fbc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Oct 2008 12:52:42 +0200 Subject: block: add one-hit cache for disk partition lookup disk_map_sector_rcu() returns a partition from a sector offset, which we use for IO statistics on a per-partition basis. The lookup itself is an O(N) list lookup, where N is the number of partitions. This actually hurts performance quite a bit, even on the lower end partitions. On higher numbered partitions, it can get pretty bad. Solve this by adding a one-hit cache for partition lookup. This makes the lookup O(1) for the case where we do most IO to one partition. Even for mixed partition workloads, amortized cost is pretty close to O(1) since the natural IO batching makes the one-hit cache last for lots of IOs. Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3df7742ce246..16948eaecae3 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -126,6 +126,7 @@ struct blk_scsi_cmd_filter { struct disk_part_tbl { struct rcu_head rcu_head; int len; + struct hd_struct *last_lookup; struct hd_struct *part[]; }; -- cgit v1.2.3 From b3a6ffe16b5cc48abe7db8d04882dc45280eb693 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 12 Dec 2008 09:51:16 +0100 Subject: Get rid of CONFIG_LSF We have two seperate config entries for large devices/files. One is CONFIG_LBD that guards just the devices, the other is CONFIG_LSF that handles large files. This doesn't make a lot of sense, you typically want both or none. So get rid of CONFIG_LSF and change CONFIG_LBD wording to indicate that it covers both. Acked-by: Jean Delvare Signed-off-by: Jens Axboe --- include/linux/types.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 1d98330b1f2c..121f349cb7ec 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -135,19 +135,14 @@ typedef __s64 int64_t; * * Linux always considers sectors to be 512 bytes long independently * of the devices real block size. + * + * blkcnt_t is the type of the inode's block count. */ #ifdef CONFIG_LBD typedef u64 sector_t; -#else -typedef unsigned long sector_t; -#endif - -/* - * The type of the inode's block count. - */ -#ifdef CONFIG_LSF typedef u64 blkcnt_t; #else +typedef unsigned long sector_t; typedef unsigned long blkcnt_t; #endif -- cgit v1.2.3 From f453ba0460742ad027ae0c4c7d61e62817b3e7ef Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 7 Nov 2008 14:05:41 -0800 Subject: DRM: add mode setting support Add mode setting support to the DRM layer. This is a fairly big chunk of work that allows DRM drivers to provide full output control and configuration capabilities to userspace. It was motivated by several factors: - the fb layer's APIs aren't suited for anything but simple configurations - coordination between the fb layer, DRM layer, and various userspace drivers is poor to non-existent (radeonfb excepted) - user level mode setting drivers makes displaying panic & oops messages more difficult - suspend/resume of graphics state is possible in many more configurations with kernel level support This commit just adds the core DRM part of the mode setting APIs. Driver specific commits using these new structure and APIs will follow. Co-authors: Jesse Barnes , Jakob Bornecrantz Contributors: Alan Hourihane , Maarten Maathuis Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- include/linux/console.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 248e6e3b9b73..a67a90cf8268 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -153,4 +153,8 @@ void vcs_remove_sysfs(struct tty_struct *tty); #define VESA_HSYNC_SUSPEND 2 #define VESA_POWERDOWN 3 +#ifdef CONFIG_VGA_CONSOLE +extern bool vgacon_text_force(void); +#endif + #endif /* _LINUX_CONSOLE_H */ -- cgit v1.2.3 From 773ff60e841461cb1f9374a713ffcda029b8c317 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 23 Dec 2008 19:37:01 +0900 Subject: SLUB: failslab support Currently fault-injection capability for SLAB allocator is only available to SLAB. This patch makes it available to SLUB, too. [penberg@cs.helsinki.fi: unify slab and slub implementations] Cc: Christoph Lameter Cc: Matt Mackall Signed-off-by: Akinobu Mita Signed-off-by: Pekka Enberg --- include/linux/fault-inject.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 32368c4f0326..06ca9b21dad2 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -81,4 +81,13 @@ static inline void cleanup_fault_attr_dentries(struct fault_attr *attr) #endif /* CONFIG_FAULT_INJECTION */ +#ifdef CONFIG_FAILSLAB +extern bool should_failslab(size_t size, gfp_t gfpflags); +#else +static inline bool should_failslab(size_t size, gfp_t gfpflags) +{ + return false; +} +#endif /* CONFIG_FAILSLAB */ + #endif /* _LINUX_FAULT_INJECT_H */ -- cgit v1.2.3 From dfcd3610289132a762b7dc0eaf33998262cd9e20 Mon Sep 17 00:00:00 2001 From: Pascal Terjan Date: Tue, 25 Nov 2008 15:08:19 +0100 Subject: slab: Fix comment on #endif This #endif in slab.h is described as closing the inner block while it's for the big CONFIG_NUMA one. That makes reading the code a bit harder. This trivial patch fixes the comment. Signed-off-by: Pascal Terjan Signed-off-by: Pekka Enberg --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 000da12b5cf0..9d8ca14be3c4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -285,7 +285,7 @@ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); #define kmalloc_node_track_caller(size, flags, node) \ kmalloc_track_caller(size, flags) -#endif /* DEBUG_SLAB */ +#endif /* CONFIG_NUMA */ /* * Shortcuts -- cgit v1.2.3 From 43a256322ac1fc105c181b3cade3b9bfc0b63ca1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 28 Dec 2008 16:01:13 -0800 Subject: sparseirq: move __weak symbols into separate compilation unit GCC has a bug with __weak alias functions: if the functions are in the same compilation unit as their call site, GCC can decide to inline them - and thus rob the linker of the opportunity to override the weak alias with the real thing. So move all the IRQ handling related __weak symbols to kernel/irq/chip.c. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 6 ++++++ include/linux/irq.h | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 777f89e00b4a..d9a370325ae2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -467,4 +467,10 @@ static inline void init_irq_proc(void) int show_interrupts(struct seq_file *p, void *v); +struct irq_desc; + +extern int early_irq_init(void); +extern int arch_early_irq_init(void); +extern int arch_init_chip_data(struct irq_desc *desc, int cpu); + #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index 0e40af4bac40..d64a6d49bdef 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -193,9 +193,6 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -extern int early_irq_init(void); -extern int arch_early_irq_init(void); -extern int arch_init_chip_data(struct irq_desc *desc, int cpu); extern void arch_init_copy_chip_data(struct irq_desc *old_desc, struct irq_desc *desc, int cpu); extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); -- cgit v1.2.3 From d61c72e52b98411d1cfef1fdb3f5a8bb070f8966 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Dec 2008 14:07:21 +0100 Subject: DMI: add dmi_match Add a wrapper for testing system_info which will handle also NULL system infos. This will be used by the ata PIIX driver. Signed-off-by: Jiri Slaby Cc: Alexandru Romanescu Cc: Tejun Heo Cc: Alan Cox Signed-off-by: Jeff Garzik --- include/linux/dmi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 2bfda178f274..34161907b2f8 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -47,6 +47,7 @@ extern int dmi_name_in_vendors(const char *str); extern int dmi_name_in_serial(const char *str); extern int dmi_available; extern int dmi_walk(void (*decode)(const struct dmi_header *)); +extern bool dmi_match(enum dmi_field f, const char *str); #else @@ -61,6 +62,8 @@ static inline int dmi_name_in_serial(const char *s) { return 0; } #define dmi_available 0 static inline int dmi_walk(void (*decode)(const struct dmi_header *)) { return -1; } +static inline bool dmi_match(enum dmi_field f, const char *str) + { return false; } #endif -- cgit v1.2.3 From ea319518ba3de282c13ae1cf4bf2215c5e03e67e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Dec 2008 15:08:55 +0100 Subject: locking, percpu counters: introduce separate lock classes Impact: fix lockdep false positives Classify percpu_counter instances similar to regular lock objects -- that is, per instantiation site. The networking code has increased its use of percpu_counters, which leads to false positives if they are treated as a single class. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/percpu_counter.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc112..96bdde36599f 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -30,8 +30,16 @@ struct percpu_counter { #define FBC_BATCH (NR_CPUS*4) #endif -int percpu_counter_init(struct percpu_counter *fbc, s64 amount); -int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, + struct lock_class_key *key); + +#define percpu_counter_init(fbc, value) \ + ({ \ + static struct lock_class_key __key; \ + \ + __percpu_counter_init(fbc, value, &__key); \ + }) + void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); @@ -85,8 +93,6 @@ static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount) return 0; } -#define percpu_counter_init_irq percpu_counter_init - static inline void percpu_counter_destroy(struct percpu_counter *fbc) { } -- cgit v1.2.3 From 34a4c5eb421dab6fe8381aa12c990f9d6f645b17 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 29 Dec 2008 04:00:23 -0800 Subject: Input: map_to_7segment.h - convert to __inline__ for userspace Use __inline__ rather than inline for map_to_seg7() since it is exported to userspace. Signed-off-by: Mike Frysinger Signed-off-by: Dmitry Torokhov --- include/linux/map_to_7segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/map_to_7segment.h b/include/linux/map_to_7segment.h index 7df8432c4402..12d62a54d470 100644 --- a/include/linux/map_to_7segment.h +++ b/include/linux/map_to_7segment.h @@ -75,7 +75,7 @@ struct seg7_conversion_map { unsigned char table[128]; }; -static inline int map_to_seg7(struct seg7_conversion_map *map, int c) +static __inline__ int map_to_seg7(struct seg7_conversion_map *map, int c) { return c >= 0 && c < sizeof(map->table) ? map->table[c] : -EINVAL; } -- cgit v1.2.3 From 2a2ca6a96194c4744a2adeefbc09ce881f3c5abe Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:31 +0100 Subject: ide: replace the global ide_lock spinlock by per-hwgroup spinlocks (v2) Now that (almost) all host drivers have been fixed not to abuse ide_lock and core code usage of ide_lock has been sanitized we may safely replace ide_lock by per-hwgroup locks. This patch is partially based on earlier patch from Ravikiran G Thirumalai. While at it: - don't use deprecated HWIF() and HWGROUP() macros - update locking documentation in ide.h v2: Add missing spin_lock_init(&hwgroup->lock). (Noticed by Elias Oltmanns) Cc: Vaibhav V. Nivargi Cc: Alok N. Kataria Cc: Shai Fultheim Signed-off-by: Ravikiran Thirumalai Cc: Elias Oltmanns Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 010fb26a1579..c871d325cedb 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -909,6 +909,8 @@ typedef struct hwgroup_s { int req_gen; int req_gen_timer; + + spinlock_t lock; } ide_hwgroup_t; typedef struct ide_driver_s ide_driver_t; @@ -1610,13 +1612,13 @@ extern struct mutex ide_cfg_mtx; /* * Structure locking: * - * ide_cfg_mtx and ide_lock together protect changes to - * ide_hwif_t->{next,hwgroup} + * ide_cfg_mtx and hwgroup->lock together protect changes to + * ide_hwif_t->next * ide_drive_t->next * - * ide_hwgroup_t->busy: ide_lock - * ide_hwgroup_t->hwif: ide_lock - * ide_hwif_t->mate: constant, no locking + * ide_hwgroup_t->busy: hwgroup->lock + * ide_hwgroup_t->hwif: hwgroup->lock + * ide_hwif_t->{hwgroup,mate}: constant, no locking * ide_drive_t->hwif: constant, no locking */ -- cgit v1.2.3 From 27c01c2db05c3cf8824975e50403cd4fd9356dca Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:32 +0100 Subject: ide-cd: remove obsolete seek optimization It doesn't make much sense nowadays and is problematic on some drives. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index c871d325cedb..0b8af0535d85 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -122,8 +122,6 @@ struct ide_io_ports { #define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */ #define SECTOR_SIZE 512 -#define IDE_LARGE_SEEK(b1,b2,t) (((b1) > (b2) + (t)) || ((b2) > (b1) + (t))) - /* * Timeouts for various operations: */ @@ -496,8 +494,6 @@ enum { * when more than one interrupt is needed. */ IDE_AFLAG_LIMIT_NFRAMES = (1 << 7), - /* Seeking in progress. */ - IDE_AFLAG_SEEKING = (1 << 8), /* Saved TOC information is current. */ IDE_AFLAG_TOC_VALID = (1 << 9), /* We think that the drive door is locked. */ -- cgit v1.2.3 From 6b5cde3629701258004b94cde75dd1089b556b02 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:32 +0100 Subject: cmd64x: set IDE_HFLAG_SERIALIZE explictly for CMD646 * Set IDE_HFLAG_SERIALIZE explictly for CMD646. * Remove no longer needed ide_cmd646 chipset type (which has a nice side-effect of fixing handling of unexpected IRQs). Cc: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 0b8af0535d85..150e42311ee0 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -171,7 +171,7 @@ enum { ide_unknown, ide_generic, ide_pci, ide_cmd640, ide_dtc2278, ide_ali14xx, ide_qd65xx, ide_umc8672, ide_ht6560b, ide_rz1000, ide_trm290, - ide_cmd646, ide_cy82c693, ide_4drives, + ide_cy82c693, ide_4drives, ide_pmac, ide_acorn, ide_au1xxx, ide_palm3710 }; -- cgit v1.2.3 From f58c1ab8deebc2360cef998f169a6727c288210f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:33 +0100 Subject: ide: always set nIEN on idle devices * Set nIEN for previous port/device in ide_do_request() also if port uses a non-shared IRQ. * Remove no longer needed ide_hwif_t.sharing_irq. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 150e42311ee0..1d28006aec68 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -842,7 +842,6 @@ typedef struct hwif_s { unsigned present : 1; /* this interface exists */ unsigned serialized : 1; /* serialized all channel operation */ - unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ struct device gendev; -- cgit v1.2.3 From 7f1ac8c4b9dadc55ec656b368f5f470f2cbe3083 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:33 +0100 Subject: rz1000: apply chipset quirks early (v2) * Use pci_name(dev) instead of hwif->name in init_hwif_rz1000(). * init_hwif_rz1000() -> rz1000_init_chipset(). Update rz1000_init_one() to use rz1000_init_chipset() and add now required rz1000_remove(). * Remove superfluous ide_rz1000 chipset type. v2: * unsigned int rz1000_init_chipset() -> int rz1000_disable_readahead() per Sergei's suggestion. Cc: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1d28006aec68..fc1a966c7b7d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -170,8 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *); enum { ide_unknown, ide_generic, ide_pci, ide_cmd640, ide_dtc2278, ide_ali14xx, ide_qd65xx, ide_umc8672, ide_ht6560b, - ide_rz1000, ide_trm290, - ide_cy82c693, ide_4drives, + ide_trm290, ide_cy82c693, ide_4drives, ide_pmac, ide_acorn, ide_au1xxx, ide_palm3710 }; -- cgit v1.2.3 From 6b4924962c49655494d2c8e9d3faab0e349a3062 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:34 +0100 Subject: ide: add ->max_sectors field to struct ide_port_info * Add ->max_sectors field to struct ide_port_info to allow host drivers to specify value used for hwif->rqsize (if smaller than the default). * Convert pdc202xx_old to use ->max_sectors and remove no longer needed IDE_HFLAG_RQSIZE_256 flag. There should be no functional changes caused by this patch. Acked-by: Sergei Shtyltov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index fc1a966c7b7d..2574dda4a3e7 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1372,8 +1372,6 @@ enum { IDE_HFLAG_LEGACY_IRQS = (1 << 21), /* force use of legacy IRQs */ IDE_HFLAG_FORCE_LEGACY_IRQS = (1 << 22), - /* limit LBA48 requests to 256 sectors */ - IDE_HFLAG_RQSIZE_256 = (1 << 23), /* use 32-bit I/O ops */ IDE_HFLAG_IO_32BIT = (1 << 24), /* unmask IRQs */ @@ -1411,6 +1409,9 @@ struct ide_port_info { ide_pci_enablebit_t enablebits[2]; hwif_chipset_t chipset; + + u16 max_sectors; /* if < than the default one */ + u32 host_flags; u8 pio_mask; u8 swdma_mask; -- cgit v1.2.3 From 1f66019bdf902cb59adf959e462bcd3f4c01f683 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:34 +0100 Subject: trm290: add IDE_HFLAG_TRM290 host flag * Add IDE_HFLAG_TRM290 host flag and use it in ide_build_dmatable(). * Remove no longer needed ide_trm290 chipset type. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 2574dda4a3e7..f62d35a5fb71 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -170,7 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *); enum { ide_unknown, ide_generic, ide_pci, ide_cmd640, ide_dtc2278, ide_ali14xx, ide_qd65xx, ide_umc8672, ide_ht6560b, - ide_trm290, ide_cy82c693, ide_4drives, + ide_cy82c693, ide_4drives, ide_pmac, ide_acorn, ide_au1xxx, ide_palm3710 }; @@ -1372,6 +1372,8 @@ enum { IDE_HFLAG_LEGACY_IRQS = (1 << 21), /* force use of legacy IRQs */ IDE_HFLAG_FORCE_LEGACY_IRQS = (1 << 22), + /* host is TRM290 */ + IDE_HFLAG_TRM290 = (1 << 23), /* use 32-bit I/O ops */ IDE_HFLAG_IO_32BIT = (1 << 24), /* unmask IRQs */ -- cgit v1.2.3 From b7876a6fb6e9bf6cbcf7b40cf034aa4138d7978f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:34 +0100 Subject: cy82c693: remove superfluous ide_cy82c693 chipset type Since CY82C693 doesn't require serialization we may as well use the default ide_pci chipset type. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f62d35a5fb71..f89d6d69a386 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -170,8 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *); enum { ide_unknown, ide_generic, ide_pci, ide_cmd640, ide_dtc2278, ide_ali14xx, ide_qd65xx, ide_umc8672, ide_ht6560b, - ide_cy82c693, ide_4drives, - ide_pmac, ide_acorn, + ide_4drives, ide_pmac, ide_acorn, ide_au1xxx, ide_palm3710 }; -- cgit v1.2.3 From 702c026be87ef8374ae58122969a4b0b081ce6f2 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:36 +0100 Subject: ide: rework handling of serialized ports (v2) * hpt366: set IDE_HFLAG_SERIALIZE in ->host_flags if needed in init_hwif_hpt366(). Remove HPT_SERIALIZE_IO while at it. * Set IDE_HFLAG_SERIALIZE in ->host_flags if needed in ide_init_port(). * Convert init_irq() to use IDE_HFLAG_SERIALIZE together with hwif->host to find out ports which need to be serialized. * Remove no longer needed save_match() and ide_hwif_t.serialized. v2: * Set host's ->host_flags field instead of port's copy. This patch should fix the incorrect grouping of port(s) from host(s) that need serialization with port(s) that happen to use the same IRQ(s) but are from the host(s) that don't need it. Cc: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f89d6d69a386..9b89cab6d493 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -839,7 +839,6 @@ typedef struct hwif_s { unsigned extra_ports; /* number of extra dma ports */ unsigned present : 1; /* this interface exists */ - unsigned serialized : 1; /* serialized all channel operation */ unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ struct device gendev; -- cgit v1.2.3 From e2984c628c924442132304ae662da433f41c05c9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Dec 2008 20:27:37 +0100 Subject: ide: move Power Management support to ide-pm.c There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9b89cab6d493..e99c56de7f56 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1116,6 +1116,14 @@ enum { IDE_PM_COMPLETED, }; +int generic_ide_suspend(struct device *, pm_message_t); +int generic_ide_resume(struct device *); + +void ide_complete_power_step(ide_drive_t *, struct request *); +ide_startstop_t ide_start_power_step(ide_drive_t *, struct request *); +void ide_complete_pm_request(ide_drive_t *, struct request *); +void ide_check_pm_state(ide_drive_t *, struct request *); + /* * Subdrivers support. * -- cgit v1.2.3 From 69acdf1e5a9146ec6667f6c4b439acd38c18f5ea Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 4 Nov 2008 21:59:37 -0300 Subject: V4L/DVB (9530): Add new pixel format VYUY 16 bits wide. There were already 3 YUV formats defined : - YUYV - YVYU - UYVY The only left combination is VYUY, which is added in this patch. Signed-off-by: Robert Jarzmik Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 4669d7e72e75..ec311d4616cd 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -293,6 +293,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YVU420 v4l2_fourcc('Y', 'V', '1', '2') /* 12 YVU 4:2:0 */ #define V4L2_PIX_FMT_YUYV v4l2_fourcc('Y', 'U', 'Y', 'V') /* 16 YUV 4:2:2 */ #define V4L2_PIX_FMT_UYVY v4l2_fourcc('U', 'Y', 'V', 'Y') /* 16 YUV 4:2:2 */ +#define V4L2_PIX_FMT_VYUY v4l2_fourcc('V', 'Y', 'U', 'Y') /* 16 YUV 4:2:2 */ #define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16 YVU422 planar */ #define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4', '1', '1', 'P') /* 16 YVU411 planar */ #define V4L2_PIX_FMT_Y41P v4l2_fourcc('Y', '4', '1', 'P') /* 12 YUV 4:1:1 */ -- cgit v1.2.3 From 278d1ed65e25d80af7c3a112d707b3f70516ddb4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:12 +1030 Subject: cpumask: make CONFIG_NR_CPUS always valid. Impact: cleanup Currently we have NR_CPUS, which is 1 on UP, and CONFIG_NR_CPUS on SMP. If we make CONFIG_NR_CPUS always valid (and always 1 on !SMP), we can skip the middleman. This also allows us to find and check all the unaudited NR_CPUS usage as we prepare for v. large NR_CPUS. To avoid breaking every arch, we cheat and do this for the moment in the header if the arch doesn't. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- include/linux/threads.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/threads.h b/include/linux/threads.h index 38d1a5d6568e..052b12bec8bd 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -8,17 +8,17 @@ */ /* - * Maximum supported processors that can run under SMP. This value is - * set via configure setting. The maximum is equal to the size of the - * bitmasks used on that platform, i.e. 32 or 64. Setting this smaller - * saves quite a bit of memory. + * Maximum supported processors. Setting this smaller saves quite a + * bit of memory. Use nr_cpu_ids instead of this except for static bitmaps. */ -#ifdef CONFIG_SMP -#define NR_CPUS CONFIG_NR_CPUS -#else -#define NR_CPUS 1 +#ifndef CONFIG_NR_CPUS +/* FIXME: This should be fixed in the arch's Kconfig */ +#define CONFIG_NR_CPUS 1 #endif +/* Places which use this should consider cpumask_var_t. */ +#define NR_CPUS CONFIG_NR_CPUS + #define MIN_THREADS_LEFT_FOR_ROOT 4 /* -- cgit v1.2.3 From 4b0bc0bca83f3fb7cf920e2ec80684c15d2269c0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:13 +1030 Subject: bitmap: test for constant as well as small size for inline versions Impact: reduce text size bitmap_zero et al have a fastpath for nbits <= BITS_PER_LONG, but this should really only apply where the nbits is known at compile time. This only saves about 1200 bytes on an allyesconfig kernel, but with cpumasks going variable that number will increase. text data bss dec hex filename 35327852 5035607 6782976 47146435 2cf65c3 vmlinux-before 35326640 5035607 6782976 47145223 2cf6107 vmlinux-after Signed-off-by: Rusty Russell --- include/linux/bitmap.h | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index a08c33a26ca9..2878811c6134 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -137,9 +137,12 @@ extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ ) +#define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) + static inline void bitmap_zero(unsigned long *dst, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = 0UL; else { int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); @@ -150,7 +153,7 @@ static inline void bitmap_zero(unsigned long *dst, int nbits) static inline void bitmap_fill(unsigned long *dst, int nbits) { size_t nlongs = BITS_TO_LONGS(nbits); - if (nlongs > 1) { + if (!small_const_nbits(nbits)) { int len = (nlongs - 1) * sizeof(unsigned long); memset(dst, 0xff, len); } @@ -160,7 +163,7 @@ static inline void bitmap_fill(unsigned long *dst, int nbits) static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src; else { int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); @@ -171,7 +174,7 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, static inline void bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src1 & *src2; else __bitmap_and(dst, src1, src2, nbits); @@ -180,7 +183,7 @@ static inline void bitmap_and(unsigned long *dst, const unsigned long *src1, static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src1 | *src2; else __bitmap_or(dst, src1, src2, nbits); @@ -189,7 +192,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; else __bitmap_xor(dst, src1, src2, nbits); @@ -198,7 +201,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src1 & ~(*src2); else __bitmap_andnot(dst, src1, src2, nbits); @@ -207,7 +210,7 @@ static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1, static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); else __bitmap_complement(dst, src, nbits); @@ -216,7 +219,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr static inline int bitmap_equal(const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_equal(src1, src2, nbits); @@ -225,7 +228,7 @@ static inline int bitmap_equal(const unsigned long *src1, static inline int bitmap_intersects(const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; else return __bitmap_intersects(src1, src2, nbits); @@ -234,7 +237,7 @@ static inline int bitmap_intersects(const unsigned long *src1, static inline int bitmap_subset(const unsigned long *src1, const unsigned long *src2, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_subset(src1, src2, nbits); @@ -242,7 +245,7 @@ static inline int bitmap_subset(const unsigned long *src1, static inline int bitmap_empty(const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_empty(src, nbits); @@ -250,7 +253,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits) static inline int bitmap_full(const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); else return __bitmap_full(src, nbits); @@ -258,7 +261,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits) static inline int bitmap_weight(const unsigned long *src, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); return __bitmap_weight(src, nbits); } @@ -266,7 +269,7 @@ static inline int bitmap_weight(const unsigned long *src, int nbits) static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, int n, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = *src >> n; else __bitmap_shift_right(dst, src, n, nbits); @@ -275,7 +278,7 @@ static inline void bitmap_shift_right(unsigned long *dst, static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, int n, int nbits) { - if (nbits <= BITS_PER_LONG) + if (small_const_nbits(nbits)) *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits); else __bitmap_shift_left(dst, src, n, nbits); -- cgit v1.2.3 From cb78a0ce69fad2026825f957e24e2d9cda1ec9f1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:14 +1030 Subject: bitmap: fix seq_bitmap and seq_cpumask to take const pointer Impact: cleanup seq_bitmap just calls bitmap_scnprintf on the bits: that arg can be const. Similarly, seq_cpumask just calls seq_bitmap. Signed-off-by: Rusty Russell --- include/linux/seq_file.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index b3dfa72f13b9..952e0187ba16 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -50,8 +50,9 @@ int seq_path(struct seq_file *, struct path *, char *); int seq_dentry(struct seq_file *, struct dentry *, char *); int seq_path_root(struct seq_file *m, struct path *path, struct path *root, char *esc); -int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits); -static inline int seq_cpumask(struct seq_file *m, cpumask_t *mask) +int seq_bitmap(struct seq_file *m, const unsigned long *bits, + unsigned int nr_bits); +static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask) { return seq_bitmap(m, mask->bits, NR_CPUS); } -- cgit v1.2.3 From b3199c025d1646e25e7d1d640dd605db251dccf8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:14 +1030 Subject: cpumask: switch over to cpu_online/possible/active/present_mask: core Impact: cleanup This implements the obsolescent cpu_online_map in terms of cpu_online_mask, rather than the other way around. Same for the other maps. The documentation comments are also updated to refer to _mask rather than _map. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- include/linux/cpumask.h | 75 +++++++++++++++++++------------------------------ 1 file changed, 29 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b5ad19a6f43f..db2341beca45 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -416,65 +416,54 @@ int __next_cpu_nr(int n, const cpumask_t *srcp); /* * The following particular system cpumasks and operations manage - * possible, present, active and online cpus. Each of them is a fixed size - * bitmap of size NR_CPUS. + * possible, present, active and online cpus. * - * #ifdef CONFIG_HOTPLUG_CPU - * cpu_possible_map - has bit 'cpu' set iff cpu is populatable - * cpu_present_map - has bit 'cpu' set iff cpu is populated - * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler - * cpu_active_map - has bit 'cpu' set iff cpu available to migration - * #else - * cpu_possible_map - has bit 'cpu' set iff cpu is populated - * cpu_present_map - copy of cpu_possible_map - * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler - * #endif + * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable + * cpu_present_mask - has bit 'cpu' set iff cpu is populated + * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler + * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * - * In either case, NR_CPUS is fixed at compile time, as the static - * size of these bitmaps. The cpu_possible_map is fixed at boot - * time, as the set of CPU id's that it is possible might ever - * be plugged in at anytime during the life of that system boot. - * The cpu_present_map is dynamic(*), representing which CPUs - * are currently plugged in. And cpu_online_map is the dynamic - * subset of cpu_present_map, indicating those CPUs available - * for scheduling. + * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * - * If HOTPLUG is enabled, then cpu_possible_map is forced to have + * The cpu_possible_mask is fixed at boot time, as the set of CPU id's + * that it is possible might ever be plugged in at anytime during the + * life of that system boot. The cpu_present_mask is dynamic(*), + * representing which CPUs are currently plugged in. And + * cpu_online_mask is the dynamic subset of cpu_present_mask, + * indicating those CPUs available for scheduling. + * + * If HOTPLUG is enabled, then cpu_possible_mask is forced to have * all NR_CPUS bits set, otherwise it is just the set of CPUs that * ACPI reports present at boot. * - * If HOTPLUG is enabled, then cpu_present_map varies dynamically, + * If HOTPLUG is enabled, then cpu_present_mask varies dynamically, * depending on what ACPI reports as currently plugged in, otherwise - * cpu_present_map is just a copy of cpu_possible_map. + * cpu_present_mask is just a copy of cpu_possible_mask. * - * (*) Well, cpu_present_map is dynamic in the hotplug case. If not - * hotplug, it's a copy of cpu_possible_map, hence fixed at boot. + * (*) Well, cpu_present_mask is dynamic in the hotplug case. If not + * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP - * cpu_{online,possible,present}_maps are placebos. Changing them + * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() * and cpu_*() macros in the UP case. This ugliness is a UP * optimization - don't waste any instructions or memory references * asking if you're online or how many CPUs there are if there is * only one CPU. - * 2) Most SMP arch's #define some of these maps to be some - * other map specific to that arch. Therefore, the following - * must be #define macros, not inlines. To see why, examine - * the assembly code produced by the following. Note that - * set1() writes phys_x_map, but set2() writes x_map: - * int x_map, phys_x_map; - * #define set1(a) x_map = a - * inline void set2(int a) { x_map = a; } - * #define x_map phys_x_map - * main(){ set1(3); set2(5); } */ -extern cpumask_t cpu_possible_map; -extern cpumask_t cpu_online_map; -extern cpumask_t cpu_present_map; -extern cpumask_t cpu_active_map; +extern const struct cpumask *const cpu_possible_mask; +extern const struct cpumask *const cpu_online_mask; +extern const struct cpumask *const cpu_present_mask; +extern const struct cpumask *const cpu_active_mask; + +/* These strip const, as traditionally they weren't const. */ +#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) +#define cpu_online_map (*(cpumask_t *)cpu_online_mask) +#define cpu_present_map (*(cpumask_t *)cpu_present_mask) +#define cpu_active_map (*(cpumask_t *)cpu_active_mask) #if NR_CPUS > 1 #define num_online_cpus() cpus_weight_nr(cpu_online_map) @@ -1058,12 +1047,6 @@ static inline void free_bootmem_cpumask_var(cpumask_var_t mask) } #endif /* CONFIG_CPUMASK_OFFSTACK */ -/* The pointer versions of the maps, these will become the primary versions. */ -#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map) -#define cpu_online_mask ((const struct cpumask *)&cpu_online_map) -#define cpu_present_mask ((const struct cpumask *)&cpu_present_map) -#define cpu_active_mask ((const struct cpumask *)&cpu_active_map) - /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); -- cgit v1.2.3 From ae7a47e72e1a0b5e2b46d1596bc2c22942a73023 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:15 +1030 Subject: cpumask: make cpumask.h eat its own dogfood. Changes: 1) cpumask_t to struct cpumask, 2) cpus_weight_nr to cpumask_weight, 3) cpu_isset to cpumask_test_cpu, 4) ->bits to cpumask_bits() 5) cpu_*_map to cpu_*_mask. 6) for_each_cpu_mask_nr to for_each_cpu Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 75 +++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index db2341beca45..e62a67156c53 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -268,6 +268,25 @@ static inline void __cpus_shift_left(cpumask_t *dstp, bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } +/** + * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * @bitmap: the bitmap + * + * There are a few places where cpumask_var_t isn't appropriate and + * static cpumasks must be used (eg. very early boot), yet we don't + * expose the definition of 'struct cpumask'. + * + * This does the conversion, and can be used as a constant initializer. + */ +#define to_cpumask(bitmap) \ + ((struct cpumask *)(1 ? (bitmap) \ + : (void *)sizeof(__check_is_bitmap(bitmap)))) + +static inline int __check_is_bitmap(const unsigned long *bitmap) +{ + return 1; +} + /* * Special-case data structure for "single bit set only" constant CPU masks. * @@ -278,11 +297,11 @@ static inline void __cpus_shift_left(cpumask_t *dstp, extern const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; -static inline const cpumask_t *get_cpu_mask(unsigned int cpu) +static inline const struct cpumask *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; - return (const cpumask_t *)p; + return to_cpumask(p); } /* @@ -466,13 +485,13 @@ extern const struct cpumask *const cpu_active_mask; #define cpu_active_map (*(cpumask_t *)cpu_active_mask) #if NR_CPUS > 1 -#define num_online_cpus() cpus_weight_nr(cpu_online_map) -#define num_possible_cpus() cpus_weight_nr(cpu_possible_map) -#define num_present_cpus() cpus_weight_nr(cpu_present_map) -#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) -#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) -#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) -#define cpu_active(cpu) cpu_isset((cpu), cpu_active_map) +#define num_online_cpus() cpumask_weight(cpu_online_mask) +#define num_possible_cpus() cpumask_weight(cpu_possible_mask) +#define num_present_cpus() cpumask_weight(cpu_present_mask) +#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) +#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) +#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) +#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask) #else #define num_online_cpus() 1 #define num_possible_cpus() 1 @@ -485,10 +504,6 @@ extern const struct cpumask *const cpu_active_mask; #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map) -#define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) -#define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) - /* These are the new versions of the cpumask operators: passed by pointer. * The older versions will be implemented in terms of these, then deleted. */ #define cpumask_bits(maskp) ((maskp)->bits) @@ -676,7 +691,7 @@ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) * No static inline type checking - see Subtlety (1) above. */ #define cpumask_test_cpu(cpu, cpumask) \ - test_bit(cpumask_check(cpu), (cpumask)->bits) + test_bit(cpumask_check(cpu), cpumask_bits((cpumask))) /** * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask @@ -919,7 +934,7 @@ static inline void cpumask_copy(struct cpumask *dstp, static inline int cpumask_scnprintf(char *buf, int len, const struct cpumask *srcp) { - return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits); + return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpumask_bits); } /** @@ -933,7 +948,7 @@ static inline int cpumask_scnprintf(char *buf, int len, static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { - return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits); + return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** @@ -948,7 +963,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len, static inline int cpulist_scnprintf(char *buf, int len, const struct cpumask *srcp) { - return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits); + return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp), + nr_cpumask_bits); } /** @@ -961,26 +977,7 @@ static inline int cpulist_scnprintf(char *buf, int len, */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { - return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits); -} - -/** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * - * @bitmap: the bitmap - * - * There are a few places where cpumask_var_t isn't appropriate and - * static cpumasks must be used (eg. very early boot), yet we don't - * expose the definition of 'struct cpumask'. - * - * This does the conversion, and can be used as a constant initializer. - */ -#define to_cpumask(bitmap) \ - ((struct cpumask *)(1 ? (bitmap) \ - : (void *)sizeof(__check_is_bitmap(bitmap)))) - -static inline int __check_is_bitmap(const unsigned long *bitmap) -{ - return 1; + return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); } /** @@ -1055,6 +1052,10 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); /* First bits of cpu_bit_bitmap are in fact unset. */ #define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) +#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) +#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) +#define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) + /* Wrappers for arch boot code to manipulate normally-constant masks */ static inline void set_cpu_possible(unsigned int cpu, bool possible) { -- cgit v1.2.3 From 3fa41520696fec2815e2d88fbcccdda77ba4d693 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:16 +1030 Subject: cpumask: make set_cpu_*/init_cpu_* out-of-line They're only for use in boot/cpu hotplug code anyway, and this avoids the use of deprecated cpu_*_map. Stephen Rothwell points out that gcc 4.2.4 (on powerpc at least) didn't like the cast away of const anyway: include/linux/cpumask.h: In function 'set_cpu_possible': include/linux/cpumask.h:1052: warning: passing argument 2 of 'cpumask_set_cpu' discards qualifiers from pointer target type So this kills two birds with one stone. Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 53 +++++++------------------------------------------ 1 file changed, 7 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index e62a67156c53..7c178a6baae3 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1057,50 +1057,11 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) /* Wrappers for arch boot code to manipulate normally-constant masks */ -static inline void set_cpu_possible(unsigned int cpu, bool possible) -{ - if (possible) - cpumask_set_cpu(cpu, &cpu_possible_map); - else - cpumask_clear_cpu(cpu, &cpu_possible_map); -} - -static inline void set_cpu_present(unsigned int cpu, bool present) -{ - if (present) - cpumask_set_cpu(cpu, &cpu_present_map); - else - cpumask_clear_cpu(cpu, &cpu_present_map); -} - -static inline void set_cpu_online(unsigned int cpu, bool online) -{ - if (online) - cpumask_set_cpu(cpu, &cpu_online_map); - else - cpumask_clear_cpu(cpu, &cpu_online_map); -} - -static inline void set_cpu_active(unsigned int cpu, bool active) -{ - if (active) - cpumask_set_cpu(cpu, &cpu_active_map); - else - cpumask_clear_cpu(cpu, &cpu_active_map); -} - -static inline void init_cpu_present(const struct cpumask *src) -{ - cpumask_copy(&cpu_present_map, src); -} - -static inline void init_cpu_possible(const struct cpumask *src) -{ - cpumask_copy(&cpu_possible_map, src); -} - -static inline void init_cpu_online(const struct cpumask *src) -{ - cpumask_copy(&cpu_online_map, src); -} +void set_cpu_possible(unsigned int cpu, bool possible); +void set_cpu_present(unsigned int cpu, bool present); +void set_cpu_online(unsigned int cpu, bool online); +void set_cpu_active(unsigned int cpu, bool active); +void init_cpu_present(const struct cpumask *src); +void init_cpu_possible(const struct cpumask *src); +void init_cpu_online(const struct cpumask *src); #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From 54b11e6d57a10aa9d0009efd93873e17bffd5d30 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:16 +1030 Subject: cpumask: smp_call_function_many() Impact: Implementation change to remove cpumask_t from stack. Actually change smp_call_function_mask() to smp_call_function_many(). We avoid cpumasks on the stack in this version. (S390 has its own version, but that's going away apparently). We have to do some dancing to figure out if 0 or 1 other cpus are in the mask supplied and the online mask without allocating a tmp cpumask. It's still fairly cheap. We allocate the cpumask at the end of the call_function_data structure: if allocation fails we fallback to smp_call_function_single rather than using the baroque quiescing code (which needs a cpumask on stack). (Thanks to Hiroshi Shimamoto for spotting several bugs in previous versions!) Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: Hiroshi Shimamoto Cc: npiggin@suse.de Cc: axboe@kernel.dk --- include/linux/smp.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 2f85f3b04bc4..b82466968101 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -67,15 +67,16 @@ extern void smp_cpus_done(unsigned int max_cpus); * Call a function on all other processors */ int smp_call_function(void(*func)(void *info), void *info, int wait); -/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */ -int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, - int wait); +void smp_call_function_many(const struct cpumask *mask, + void (*func)(void *info), void *info, bool wait); -static inline void smp_call_function_many(const struct cpumask *mask, - void (*func)(void *info), void *info, - int wait) +/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */ +static inline int +smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, + int wait) { - smp_call_function_mask(*mask, func, info, wait); + smp_call_function_many(&mask, func, info, wait); + return 0; } int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, -- cgit v1.2.3 From e12f0102ac81d660c9f801d0a0e10ccf4537a9de Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:05:19 +1030 Subject: cpumask: Use nr_cpu_ids in seq_cpumask Impact: cleanup, futureproof nr_cpu_ids is the (badly named) runtime limit on possible CPU numbers; ie. the variable version of NR_CPUS. With the new cpumask operators, only bits less than this are defined. So we should use it everywhere, rather than NR_CPUS. Eventually this will make it possible to allocate cpumasks of the minimal length at runtime. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ingo Molnar --- include/linux/seq_file.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 952e0187ba16..40ea5058c2ec 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -54,7 +54,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits, unsigned int nr_bits); static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask) { - return seq_bitmap(m, mask->bits, NR_CPUS); + return seq_bitmap(m, mask->bits, nr_cpu_ids); } static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask) -- cgit v1.2.3 From 480daab42c4dd74b3c07031ddf9031251c530c77 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:56 -0600 Subject: virtio: Don't use PAGE_SIZE in virtio_pci.c The virtio PCI devices don't depend on the guest page size. This matters now PowerPC virtio is gaining ground (they like 64k pages). Signed-off-by: Rusty Russell --- include/linux/virtio_pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index cdef35742932..e13d7ebcf576 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -53,4 +53,8 @@ /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 + +/* How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 #endif -- cgit v1.2.3 From 5f0d1d7f2286c8a02dab69f5f0bd51681fab161e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:57 -0600 Subject: virtio: rename 'pagesize' arg to vring_init/vring_size It's really the alignment desired for consumer/producer separation; historically this x86 pagesize, but with PowerPC it'll still be x86 pagesize. And in theory lguest could choose a different value. Signed-off-by: Rusty Russell --- include/linux/virtio_ring.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index c4a598fb3826..01bf3124e312 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -83,7 +83,7 @@ struct vring { * __u16 avail_idx; * __u16 available[num]; * - * // Padding to the next page boundary. + * // Padding to the next align boundary. * char pad[]; * * // A ring of used descriptor heads with free-running index. @@ -93,19 +93,19 @@ struct vring { * }; */ static inline void vring_init(struct vring *vr, unsigned int num, void *p, - unsigned long pagesize) + unsigned long align) { vr->num = num; vr->desc = p; vr->avail = p + num*sizeof(struct vring_desc); - vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1) - & ~(pagesize - 1)); + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1) + & ~(align - 1)); } -static inline unsigned vring_size(unsigned int num, unsigned long pagesize) +static inline unsigned vring_size(unsigned int num, unsigned long align) { return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) - + pagesize - 1) & ~(pagesize - 1)) + + align - 1) & ~(align - 1)) + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; } -- cgit v1.2.3 From 498af14783935af487d17dbee4ac451783cbc2b7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:57 -0600 Subject: virtio: Don't use PAGE_SIZE for vring alignment in virtio_pci. That doesn't work for non-4k guests which are now appearing. Signed-off-by: Rusty Russell --- include/linux/virtio_pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index e13d7ebcf576..cd0fd5d181a6 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -57,4 +57,8 @@ /* How many bits to shift physical queue address written to QUEUE_PFN. * 12 is historical, and due to x86 page size. */ #define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize again. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 #endif -- cgit v1.2.3 From 2966af73e70dee461c256b5eb877b2ff757f8c82 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:58 -0600 Subject: virtio: use LGUEST_VRING_ALIGN instead of relying on pagesize This doesn't really matter, since lguest is i386 only at the moment, but we could actually choose a different value. (lguest doesn't have a guarenteed ABI). Signed-off-by: Rusty Russell --- include/linux/lguest_launcher.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index e7217dc58f39..bd0eba760522 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -59,4 +59,8 @@ enum lguest_req LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ }; + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize for historical reasons. */ +#define LGUEST_VRING_ALIGN 4096 #endif /* _LINUX_LGUEST_LAUNCHER */ -- cgit v1.2.3 From 87c7d57c17ade5024d95b6ca0da249da49b0672a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:26:03 -0600 Subject: virtio: hand virtio ring alignment as argument to vring_new_virtqueue This allows each virtio user to hand in the alignment appropriate to their virtio_ring structures. Signed-off-by: Rusty Russell Acked-by: Christian Borntraeger --- include/linux/virtio_ring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 01bf3124e312..71e03722fb59 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -115,6 +115,7 @@ struct virtio_device; struct virtqueue; struct virtqueue *vring_new_virtqueue(unsigned int num, + unsigned int vring_align, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *vq), -- cgit v1.2.3 From 1b4aa2faeca1b9922033daf2475b6fc13b0ffea6 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Thu, 13 Nov 2008 15:48:33 -0600 Subject: virtio: avoid implicit use of Linux page size in balloon interface Make the balloon interface always use 4K pages, and convert Linux pfns if necessary. This patch assumes that Linux's PAGE_SHIFT will never be less than 12. Signed-off-by: Hollis Blanchard Signed-off-by: Rusty Russell (modified) --- include/linux/virtio_balloon.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index c30c7bfbf39b..8726ff77763e 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -10,6 +10,9 @@ /* The feature bitmap for virtio balloon */ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ +/* Size of a PFN in the balloon interface. */ +#define VIRTIO_BALLOON_PFN_SHIFT 12 + struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ -- cgit v1.2.3 From c29834584ea4eafccf2f62a0b8a32e64f792044c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 25 Nov 2008 13:36:26 +0100 Subject: virtio_console: support console resizing this patch uses the new hvc callback hvc_resize to set the window size which allows to change the tty size of hvc_console via a hvc_resize function. I have added a new feature bit VIRTIO_CONSOLE_F_SIZE. The driver will change the window size on tty open and via the config_changed callback of the transport. Currently lguest and kvm_s390 have not implemented this callback, but the callback can be implemented at a later point in time. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- include/linux/virtio_console.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index 19a0da0dba41..7615ffcdd555 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -7,6 +7,17 @@ /* The ID for virtio console */ #define VIRTIO_ID_CONSOLE 3 +/* Feature bits */ +#define VIRTIO_CONSOLE_F_SIZE 0 /* Does host provide console size? */ + +struct virtio_console_config { + /* colums of the screens */ + __u16 cols; + /* rows of the screens */ + __u16 rows; +} __attribute__((packed)); + + #ifdef __KERNEL__ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 58a24566449892dda409b9ad92c2e56c76c5670c Mon Sep 17 00:00:00 2001 From: Matias Zabaljauregui Date: Mon, 29 Sep 2008 01:40:07 -0300 Subject: lguest: move the initial guest page table creation code to the host This patch moves the initial guest page table creation code to the host, so the launcher keeps working with PAE enabled configs. Signed-off-by: Matias Zabaljauregui Signed-off-by: Rusty Russell --- include/linux/lguest_launcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index bd0eba760522..a53407a4165c 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -54,7 +54,7 @@ struct lguest_vqconfig { /* Write command first word is a request. */ enum lguest_req { - LHREQ_INITIALIZE, /* + base, pfnlimit, pgdir, start */ + LHREQ_INITIALIZE, /* + base, pfnlimit, start */ LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ -- cgit v1.2.3 From 47fea2adfc9e16846bc57c2f64ff233b354fef39 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 23:39:17 +0530 Subject: sched: sched.c declare variables before they get used Impact: cleanup, avoid sparse warnings In linux/sched.h moved out sysctl_sched_latency, sysctl_sched_min_granularity, sysctl_sched_wakeup_granularity, sysctl_sched_shares_ratelimit and sysctl_sched_shares_thresh from #ifdef CONFIG_SCHED_DEBUG as these variables are common for both. Fixes these sparse warnings: kernel/sched.c:825:14: warning: symbol 'sysctl_sched_shares_ratelimit' was not declared. Should it be static? kernel/sched.c:832:14: warning: symbol 'sysctl_sched_shares_thresh' was not declared. Should it be static? kernel/sched_fair.c:37:14: warning: symbol 'sysctl_sched_latency' was not declared. Should it be static? kernel/sched_fair.c:43:14: warning: symbol 'sysctl_sched_min_granularity' was not declared. Should it be static? kernel/sched_fair.c:72:14: warning: symbol 'sysctl_sched_wakeup_granularity' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8395e715809d..01d9fd268eb0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1651,16 +1651,16 @@ extern void wake_up_idle_cpu(int cpu); static inline void wake_up_idle_cpu(int cpu) { } #endif -#ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; +extern unsigned int sysctl_sched_shares_ratelimit; +extern unsigned int sysctl_sched_shares_thresh; +#ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_shares_ratelimit; -extern unsigned int sysctl_sched_shares_thresh; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, -- cgit v1.2.3 From 0877258d98154565def498833ccb208234c85084 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 14 Dec 2008 16:21:16 -0300 Subject: V4L/DVB (9897): v4l2: Add camera zoom controls The zoom controls move the zoom lens group to a an absolute position, as a relative displacement or at a given speed until reaching physical device limits. Positive values move the zoom lens group towards the telephoto direction, negative values towards the wide-angle direction. Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index ec311d4616cd..e450a92a622b 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1118,6 +1118,10 @@ enum v4l2_exposure_auto_type { #define V4L2_CID_FOCUS_RELATIVE (V4L2_CID_CAMERA_CLASS_BASE+11) #define V4L2_CID_FOCUS_AUTO (V4L2_CID_CAMERA_CLASS_BASE+12) +#define V4L2_CID_ZOOM_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+13) +#define V4L2_CID_ZOOM_RELATIVE (V4L2_CID_CAMERA_CLASS_BASE+14) +#define V4L2_CID_ZOOM_CONTINUOUS (V4L2_CID_CAMERA_CLASS_BASE+15) + /* * T U N I N G */ -- cgit v1.2.3 From 046425f8c4ac431db00c09a6d9fba16560b8e5b9 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 14 Dec 2008 16:22:05 -0300 Subject: V4L/DVB (9898): v4l2: Add privacy control The privacy control prevents video from being acquired by the camera. A true value indicates that no image can be captured. Devices that implement the privacy control must support read access and may support write access. Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index e450a92a622b..6e6743bd0f92 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1122,6 +1122,8 @@ enum v4l2_exposure_auto_type { #define V4L2_CID_ZOOM_RELATIVE (V4L2_CID_CAMERA_CLASS_BASE+14) #define V4L2_CID_ZOOM_CONTINUOUS (V4L2_CID_CAMERA_CLASS_BASE+15) +#define V4L2_CID_PRIVACY (V4L2_CID_CAMERA_CLASS_BASE+16) + /* * T U N I N G */ -- cgit v1.2.3 From 92f45badbbaccdbc1be25085292a1e258948e221 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 21 Dec 2008 10:35:25 -0300 Subject: V4L/DVB (9932): v4l2-compat32: fix 32-64 compatibility module Added all missing v4l1/2 ioctls and fix several broken conversions. Partially based on work done by Cody Pisto . Tested-by: Brandon Jenkins Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 6e6743bd0f92..e2cfd6add78c 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1465,6 +1465,8 @@ struct v4l2_chip_ident { #define VIDIOC_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_chip_ident) #endif #define VIDIOC_S_HW_FREQ_SEEK _IOW('V', 82, struct v4l2_hw_freq_seek) +/* Reminder: when adding new ioctls please add support for them to + drivers/media/video/v4l2-compat-ioctl32.c as well! */ #ifdef __OLD_VIDIOC_ /* for compatibility, will go away some day */ -- cgit v1.2.3 From 4b00eb25340c1a9b9eedaf0bc5b0f0d18eddb028 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 19 Dec 2008 11:17:56 -0300 Subject: V4L/DVB (9944): videodev2.h: fix typo. The comment said CX2584X instead of CX2341X. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index e2cfd6add78c..754c8d9685a4 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1051,7 +1051,7 @@ enum v4l2_mpeg_video_bitrate_mode { #define V4L2_CID_MPEG_VIDEO_MUTE (V4L2_CID_MPEG_BASE+210) #define V4L2_CID_MPEG_VIDEO_MUTE_YUV (V4L2_CID_MPEG_BASE+211) -/* MPEG-class control IDs specific to the CX2584x driver as defined by V4L2 */ +/* MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */ #define V4L2_CID_MPEG_CX2341X_BASE (V4L2_CTRL_CLASS_MPEG | 0x1000) #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE (V4L2_CID_MPEG_CX2341X_BASE+0) enum v4l2_mpeg_cx2341x_video_spatial_filter_mode { -- cgit v1.2.3 From 531c98e71805b32e9ea35a218119100bbd2b7615 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 22 Dec 2008 13:18:27 -0300 Subject: V4L/DVB (9953): em28xx: Add suport for debugging AC97 anciliary chips The em28xx driver can be coupled to an anciliary AC97 chip. This patch allows read/write AC97 registers directly. Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 754c8d9685a4..e31144d22237 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1376,6 +1376,7 @@ struct v4l2_streamparm { #define V4L2_CHIP_MATCH_HOST 0 /* Match against chip ID on host (0 for the host) */ #define V4L2_CHIP_MATCH_I2C_DRIVER 1 /* Match against I2C driver ID */ #define V4L2_CHIP_MATCH_I2C_ADDR 2 /* Match against I2C 7-bit address */ +#define V4L2_CHIP_MATCH_AC97 3 /* Match against anciliary AC97 chip */ struct v4l2_register { __u32 match_type; /* Match type */ -- cgit v1.2.3 From 91962fa713bd8bf47434b02ac661fdc201365fa5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 18 Dec 2008 11:45:00 -0300 Subject: V4L/DVB (10078): video: add NV16 and NV61 pixel formats This patch adds support for NV16 and NV61 pixel formats. These pixel formats use two planes; one for 8-bit Y values and one for interleaved 8-bit U and V values. NV16/NV61 formats are very similar to NV12/NV21 with the exception that NV16/NV61 are using the same number of lines for both planes. The difference between NV16 and NV61 is the U and V byte order. The fourcc values are extrapolated from the NV12/NV21 case. Signed-off-by: Magnus Damm Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index e31144d22237..1f126e30766c 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -305,6 +305,8 @@ struct v4l2_pix_format { /* two planes -- one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12 v4l2_fourcc('N', 'V', '1', '2') /* 12 Y/CbCr 4:2:0 */ #define V4L2_PIX_FMT_NV21 v4l2_fourcc('N', 'V', '2', '1') /* 12 Y/CrCb 4:2:0 */ +#define V4L2_PIX_FMT_NV16 v4l2_fourcc('N', 'V', '1', '6') /* 16 Y/CbCr 4:2:2 */ +#define V4L2_PIX_FMT_NV61 v4l2_fourcc('N', 'V', '6', '1') /* 16 Y/CrCb 4:2:2 */ /* The following formats are not defined in the V4L2 specification */ #define V4L2_PIX_FMT_YUV410 v4l2_fourcc('Y', 'U', 'V', '9') /* 9 YUV 4:1:0 */ -- cgit v1.2.3 From f748bafa3ca1fb056e63afdeecacc1c68d8104df Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 8 Dec 2008 21:30:31 -0700 Subject: ACPI: PCI: move struct acpi_prt_entry declaration out of public header file The struct acpi_prt_entry is used only in pci_irq.c, so there's no need for the declaration to be public. This patch moves it into pci_irq.c. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- include/linux/acpi.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fba8051fb297..813f937b3ab4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -131,22 +131,6 @@ extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity); */ void acpi_unregister_gsi (u32 gsi); -struct acpi_prt_entry { - struct list_head node; - struct acpi_pci_id id; - u8 pin; - struct { - acpi_handle handle; - u32 index; - } link; - u32 irq; -}; - -struct acpi_prt_list { - int count; - struct list_head entries; -}; - struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); -- cgit v1.2.3 From ea7e96e0f2277107d9ea14c3f16c86ba82b2e560 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 16 Dec 2008 16:28:17 +0800 Subject: ACPI: remove private acpica headers from driver files External driver files should not include any private acpica headers. Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/linux/pci_hotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a00bd1a0f156..c2d1a7d1886a 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -223,7 +223,6 @@ struct hotplug_params { #ifdef CONFIG_ACPI #include #include -#include extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); -- cgit v1.2.3 From 1c5745aa380efb6417b5681104b007c8612fb496 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Dec 2008 23:05:28 +0100 Subject: sched_clock: prevent scd->clock from moving backwards, take #2 Redo: 5b7dba4: sched_clock: prevent scd->clock from moving backwards which had to be reverted due to s2ram hangs: ca7e716: Revert "sched_clock: prevent scd->clock from moving backwards" ... this time with resume restoring GTOD later in the sequence taken into account as well. The "timekeeping_suspended" flag is not very nice but we cannot call into GTOD before it has been properly resumed and the scheduler will run very early in the resume sequence. Cc: Signed-off-by: Ingo Molnar --- include/linux/time.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index ce321ac5c8f8..fbbd2a1c92ba 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -105,6 +105,7 @@ extern unsigned long read_persistent_clock(void); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); +extern int timekeeping_suspended; unsigned long get_seconds(void); struct timespec current_kernel_time(void); -- cgit v1.2.3 From 457533a7d3402d1d91fbc125c8bd1bd16dcd3cd4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Dec 2008 15:11:37 +0100 Subject: [PATCH] fix scaled & unscaled cputime accounting The utimescaled / stimescaled fields in the task structure and the global cpustat should be set on all architectures. On s390 the calls to account_user_time_scaled and account_system_time_scaled never have been added. In addition system time that is accounted as guest time to the user time of a process is accounted to the scaled system time instead of the scaled user time. To fix the bugs and to prevent future forgetfulness this patch merges account_system_time_scaled into account_system_time and account_user_time_scaled into account_user_time. Cc: Benjamin Herrenschmidt Cc: Hidetoshi Seto Cc: Tony Luck Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: Michael Neuling Acked-by: Paul Mackerras Signed-off-by: Martin Schwidefsky --- include/linux/kernel_stat.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4ee4b3d2316f..c78a459662a6 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -79,10 +79,8 @@ static inline unsigned int kstat_irqs(unsigned int irq) } extern unsigned long long task_delta_exec(struct task_struct *); -extern void account_user_time(struct task_struct *, cputime_t); -extern void account_user_time_scaled(struct task_struct *, cputime_t); -extern void account_system_time(struct task_struct *, int, cputime_t); -extern void account_system_time_scaled(struct task_struct *, cputime_t); +extern void account_user_time(struct task_struct *, cputime_t, cputime_t); +extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(struct task_struct *, cputime_t); #endif /* _LINUX_KERNEL_STAT_H */ -- cgit v1.2.3 From 79741dd35713ff4f6fd0eafd59fa94e8a4ba922d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Dec 2008 15:11:38 +0100 Subject: [PATCH] idle cputime accounting The cpu time spent by the idle process actually doing something is currently accounted as idle time. This is plain wrong, the architectures that support VIRT_CPU_ACCOUNTING=y can do better: distinguish between the time spent doing nothing and the time spent by idle doing work. The first is accounted with account_idle_time and the second with account_system_time. The architectures that use the account_xxx_time interface directly and not the account_xxx_ticks interface now need to do the check for the idle process in their arch code. In particular to improve the system vs true idle time accounting the arch code needs to measure the true idle time instead of just testing for the idle process. To improve the tick based accounting as well we would need an architecture primitive that can tell us if the pt_regs of the interrupted context points to the magic instruction that halts the cpu. In addition idle time is no more added to the stime of the idle process. This field now contains the system time of the idle process as it should be. On systems without VIRT_CPU_ACCOUNTING this will always be zero as every tick that occurs while idle is running will be accounted as idle time. This patch contains the necessary common code changes to be able to distinguish idle system time and true idle time. The architectures with support for VIRT_CPU_ACCOUNTING need some changes to exploit this. Signed-off-by: Martin Schwidefsky --- include/linux/kernel_stat.h | 7 ++++++- include/linux/sched.h | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index c78a459662a6..570d20413119 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -81,6 +81,11 @@ static inline unsigned int kstat_irqs(unsigned int irq) extern unsigned long long task_delta_exec(struct task_struct *); extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); -extern void account_steal_time(struct task_struct *, cputime_t); +extern void account_steal_time(cputime_t); +extern void account_idle_time(cputime_t); + +extern void account_process_tick(struct task_struct *, int user); +extern void account_steal_ticks(unsigned long ticks); +extern void account_idle_ticks(unsigned long ticks); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 8395e715809d..b475d4db8053 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -284,7 +284,6 @@ long io_schedule_timeout(long timeout); extern void cpu_init (void); extern void trap_init(void); -extern void account_process_tick(struct task_struct *task, int user); extern void update_process_times(int user); extern void scheduler_tick(void); -- cgit v1.2.3 From c4abb7c9cde24b7351a47328ef866e6a2bbb1ad0 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 26 Sep 2008 09:30:55 +0200 Subject: KVM: x86: Support for user space injected NMIs Introduces the KVM_NMI IOCTL to the generic x86 part of KVM for injecting NMIs from user space and also extends the statistic report accordingly. Based on the original patch by Sheng Yang. Signed-off-by: Jan Kiszka Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f18b86fa8655..44fd7fa0af2b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -83,18 +83,22 @@ struct kvm_irqchip { #define KVM_EXIT_S390_SIEIC 13 #define KVM_EXIT_S390_RESET 14 #define KVM_EXIT_DCR 15 +#define KVM_EXIT_NMI 16 +#define KVM_EXIT_NMI_WINDOW_OPEN 17 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 request_nmi_window; + __u8 padding1[6]; /* out */ __u32 exit_reason; __u8 ready_for_interrupt_injection; __u8 if_flag; - __u8 padding2[2]; + __u8 ready_for_nmi_injection; + __u8 padding2; /* in (pre_kvm_run), out (post_kvm_run) */ __u64 cr8; @@ -387,6 +391,7 @@ struct kvm_trace_rec { #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 +#define KVM_CAP_NMI 19 /* * ioctls for VM fds @@ -458,6 +463,8 @@ struct kvm_trace_rec { #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) +/* Available with KVM_CAP_NMI */ +#define KVM_NMI _IO(KVMIO, 0x9a) #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -- cgit v1.2.3 From e19e30effac03f5a005a8e42ed941a2a5dc62654 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 20 Oct 2008 16:07:10 +0800 Subject: KVM: IRQ ACK notifier should be used with in-kernel irqchip Also remove unnecessary parameter of unregister irq ack notifier. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bb92be2153bc..3a0fb77d1f6a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -316,8 +316,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian); +void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -- cgit v1.2.3 From 4f906c19ae29397409bedabf7bbe5cb42ad90332 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 24 Nov 2008 14:32:51 +0800 Subject: KVM: Replace irq_requested with more generic irq_requested_type Separate guest irq type and host irq type, for we can support guest using INTx with host using MSI (but not opposite combination). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3a0fb77d1f6a..c3d4b96a08fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -307,7 +307,9 @@ struct kvm_assigned_dev_kernel { int host_devfn; int host_irq; int guest_irq; - int irq_requested; +#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) +#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) + unsigned long irq_requested_type; int irq_source_id; struct pci_dev *dev; struct kvm *kvm; -- cgit v1.2.3 From 0937c48d075ddd59ae2c12a6fa8308b9c7a63753 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 24 Nov 2008 14:32:53 +0800 Subject: KVM: Add fields for MSI device assignment Prepared for kvm_arch_assigned_device_msi_dispatch(). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 7 +++++++ include/linux/kvm_host.h | 4 ++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 44fd7fa0af2b..bb283c388a24 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -507,10 +507,17 @@ struct kvm_assigned_irq { __u32 guest_irq; __u32 flags; union { + struct { + __u32 addr_lo; + __u32 addr_hi; + __u32 data; + } guest_msi; __u32 reserved[12]; }; }; #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c3d4b96a08fa..8091a4d90ddf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -307,8 +308,11 @@ struct kvm_assigned_dev_kernel { int host_devfn; int host_irq; int guest_irq; + struct msi_msg guest_msi; #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) +#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) #define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) +#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) unsigned long irq_requested_type; int irq_source_id; struct pci_dev *dev; -- cgit v1.2.3 From 6b9cc7fd469869bed38831c5adac3f59dc25eaf5 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 24 Nov 2008 14:32:56 +0800 Subject: KVM: Enable MSI for device assignment We enable guest MSI and host MSI support in this patch. The userspace want to enable MSI should set KVM_DEV_IRQ_ASSIGN_ENABLE_MSI in the assigned_irq's flag. Function would return -ENOTTY if can't enable MSI, userspace shouldn't set MSI Enable bit when KVM_ASSIGN_IRQ return -ENOTTY with KVM_DEV_IRQ_ASSIGN_ENABLE_MSI. Userspace can tell the support of MSI device from #ifdef KVM_CAP_DEVICE_MSI. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index bb283c388a24..0997e6f5490c 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -392,6 +392,9 @@ struct kvm_trace_rec { #endif #define KVM_CAP_IOMMU 18 #define KVM_CAP_NMI 19 +#if defined(CONFIG_X86) +#define KVM_CAP_DEVICE_MSI 20 +#endif /* * ioctls for VM fds -- cgit v1.2.3 From 1a811b6167089bcdb84284f2dc9fd0b4d0f1899d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 8 Dec 2008 18:25:27 +0200 Subject: KVM: Advertise the bug in memory region destruction as fixed Userspace might need to act differently. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0997e6f5490c..48807767e726 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -395,6 +395,8 @@ struct kvm_trace_rec { #if defined(CONFIG_X86) #define KVM_CAP_DEVICE_MSI 20 #endif +/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 /* * ioctls for VM fds -- cgit v1.2.3 From defaf1587c5d7dff828f6f11c8941e5bcef00f50 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Tue, 2 Dec 2008 12:16:33 +0000 Subject: KVM: fix handling of ACK from shared guest IRQ If an assigned device shares a guest irq with an emulated device then we currently interpret an ack generated by the emulated device as originating from the assigned device leading to e.g. "Unbalanced enable for IRQ 4347" from the enable_irq() in kvm_assigned_dev_ack_irq(). The fix is fairly simple - don't enable the physical device irq unless it was previously disabled. Of course, this can still lead to a situation where a non-assigned device ACK can cause the physical device irq to be reenabled before the device was serviced. However, being level sensitive, the interrupt will merely be regenerated. Signed-off-by: Mark McLoughlin Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8091a4d90ddf..eafabd5c66b2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -307,6 +307,7 @@ struct kvm_assigned_dev_kernel { int host_busnr; int host_devfn; int host_irq; + bool host_irq_disabled; int guest_irq; struct msi_msg guest_msi; #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) -- cgit v1.2.3 From 4531220b71f0399e71cda0c4cf749e7281a7416a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 11 Dec 2008 16:54:54 +0100 Subject: KVM: x86: Rework user space NMI injection as KVM_CAP_USER_NMI There is no point in doing the ready_for_nmi_injection/ request_nmi_window dance with user space. First, we don't do this for in-kernel irqchip anyway, while the code path is the same as for user space irqchip mode. And second, there is nothing to loose if a pending NMI is overwritten by another one (in contrast to IRQs where we have to save the number). Actually, there is even the risk of raising spurious NMIs this way because the reason for the held-back NMI might already be handled while processing the first one. Therefore this patch creates a simplified user space NMI injection interface, exporting it under KVM_CAP_USER_NMI and dropping the old KVM_CAP_NMI capability. And this time we also take care to provide the interface only on archs supporting NMIs via KVM (right now only x86). Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 48807767e726..35525ac63337 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -84,21 +84,18 @@ struct kvm_irqchip { #define KVM_EXIT_S390_RESET 14 #define KVM_EXIT_DCR 15 #define KVM_EXIT_NMI 16 -#define KVM_EXIT_NMI_WINDOW_OPEN 17 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 request_nmi_window; - __u8 padding1[6]; + __u8 padding1[7]; /* out */ __u32 exit_reason; __u8 ready_for_interrupt_injection; __u8 if_flag; - __u8 ready_for_nmi_injection; - __u8 padding2; + __u8 padding2[2]; /* in (pre_kvm_run), out (post_kvm_run) */ __u64 cr8; @@ -391,12 +388,14 @@ struct kvm_trace_rec { #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 -#define KVM_CAP_NMI 19 #if defined(CONFIG_X86) #define KVM_CAP_DEVICE_MSI 20 #endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 +#if defined(CONFIG_X86) +#define KVM_CAP_USER_NMI 22 +#endif /* * ioctls for VM fds -- cgit v1.2.3 From b30f8af3358b5c66be223e3a9f3d11b3d02b4a8f Mon Sep 17 00:00:00 2001 From: Jarkko Lavinen Date: Mon, 17 Nov 2008 14:35:21 +0200 Subject: mmc: Add 8-bit bus width support Signed-off-by: Jarkko Lavinen Signed-off-by: Pierre Ossman --- include/linux/mmc/host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f842f234e44f..4e457256bd33 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -41,6 +41,7 @@ struct mmc_ios { #define MMC_BUS_WIDTH_1 0 #define MMC_BUS_WIDTH_4 2 +#define MMC_BUS_WIDTH_8 3 unsigned char timing; /* timing specification used */ @@ -116,6 +117,7 @@ struct mmc_host { #define MMC_CAP_SDIO_IRQ (1 << 3) /* Can signal pending SDIO IRQs */ #define MMC_CAP_SPI (1 << 4) /* Talks only SPI protocols */ #define MMC_CAP_NEEDS_POLL (1 << 5) /* Needs polling for card-detection */ +#define MMC_CAP_8_BIT_DATA (1 << 6) /* Can the host do 8 bit transfers */ /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ -- cgit v1.2.3 From 86e8286a0e48663e1e86a5884b30a6d05de2993a Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 26 Nov 2008 22:54:17 +0300 Subject: mmc: Add mmc_vddrange_to_ocrmask() helper function This function sets the OCR mask bits according to provided voltage ranges. Will be used by the mmc_spi OpenFirmware bindings. Signed-off-by: Anton Vorontsov Signed-off-by: Pierre Ossman --- include/linux/mmc/core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 143cebf0586f..7ac8b500d55c 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -151,4 +151,6 @@ static inline void mmc_claim_host(struct mmc_host *host) __mmc_claim_host(host, NULL); } +extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max); + #endif -- cgit v1.2.3 From 9c43df57910bbba540a6cb5c9132302a9ea5f41a Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 30 Dec 2008 18:15:28 +0300 Subject: mmc_spi: Add support for OpenFirmware bindings The support is implemented via platform data accessors, new module (of_mmc_spi) will be created automatically when the driver compiles on OpenFirmware platforms. Link-time dependency will load the module automatically. Signed-off-by: Anton Vorontsov Signed-off-by: Pierre Ossman --- include/linux/spi/mmc_spi.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h index a3626aedaec9..0f4eb165f254 100644 --- a/include/linux/spi/mmc_spi.h +++ b/include/linux/spi/mmc_spi.h @@ -1,9 +1,10 @@ #ifndef __LINUX_SPI_MMC_SPI_H #define __LINUX_SPI_MMC_SPI_H +#include +#include #include -struct device; struct mmc_host; /* Put this in platform_data of a device being used to manage an MMC/SD @@ -41,4 +42,16 @@ struct mmc_spi_platform_data { void (*setpower)(struct device *, unsigned int maskval); }; +#ifdef CONFIG_OF +extern struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi); +extern void mmc_spi_put_pdata(struct spi_device *spi); +#else +static inline struct mmc_spi_platform_data * +mmc_spi_get_pdata(struct spi_device *spi) +{ + return spi->dev.platform_data; +} +static inline void mmc_spi_put_pdata(struct spi_device *spi) {} +#endif /* CONFIG_OF */ + #endif /* __LINUX_SPI_MMC_SPI_H */ -- cgit v1.2.3 From be6d3e56a6b9b3a4ee44a0685e39e595073c6f0d Mon Sep 17 00:00:00 2001 From: Kentaro Takeda Date: Wed, 17 Dec 2008 13:24:15 +0900 Subject: introduce new LSM hooks where vfsmount is available. Add new LSM hooks for path-based checks. Call them on directory-modifying operations at the points where we still know the vfsmount involved. Signed-off-by: Kentaro Takeda Signed-off-by: Tetsuo Handa Signed-off-by: Toshiharu Harada Signed-off-by: Al Viro --- include/linux/security.h | 137 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 3416cb85e77b..b92b5e453f64 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -335,17 +335,37 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @dir contains the inode structure of the parent directory of the new link. * @new_dentry contains the dentry structure for the new link. * Return 0 if permission is granted. + * @path_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing link + * to the file. + * @new_dir contains the path structure of the parent directory of + * the new link. + * @new_dentry contains the dentry structure for the new link. + * Return 0 if permission is granted. * @inode_unlink: * Check the permission to remove a hard link to a file. * @dir contains the inode structure of parent directory of the file. * @dentry contains the dentry structure for file to be unlinked. * Return 0 if permission is granted. + * @path_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the path structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. + * Return 0 if permission is granted. * @inode_symlink: * Check the permission to create a symbolic link to a file. * @dir contains the inode structure of parent directory of the symbolic link. * @dentry contains the dentry structure of the symbolic link. * @old_name contains the pathname of file. * Return 0 if permission is granted. + * @path_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the path structure of parent directory of + * the symbolic link. + * @dentry contains the dentry structure of the symbolic link. + * @old_name contains the pathname of file. + * Return 0 if permission is granted. * @inode_mkdir: * Check permissions to create a new directory in the existing directory * associated with inode strcture @dir. @@ -353,11 +373,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @dentry contains the dentry structure of new directory. * @mode contains the mode of new directory. * Return 0 if permission is granted. + * @path_mkdir: + * Check permissions to create a new directory in the existing directory + * associated with path strcture @path. + * @dir containst the path structure of parent of the directory + * to be created. + * @dentry contains the dentry structure of new directory. + * @mode contains the mode of new directory. + * Return 0 if permission is granted. * @inode_rmdir: * Check the permission to remove a directory. * @dir contains the inode structure of parent of the directory to be removed. * @dentry contains the dentry structure of directory to be removed. * Return 0 if permission is granted. + * @path_rmdir: + * Check the permission to remove a directory. + * @dir contains the path structure of parent of the directory to be + * removed. + * @dentry contains the dentry structure of directory to be removed. + * Return 0 if permission is granted. * @inode_mknod: * Check permissions when creating a special file (or a socket or a fifo * file created via the mknod system call). Note that if mknod operation @@ -368,6 +402,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @mode contains the mode of the new file. * @dev contains the device number. * Return 0 if permission is granted. + * @path_mknod: + * Check permissions when creating a file. Note that this hook is called + * even if mknod operation is being done for a regular file. + * @dir contains the path structure of parent of the new file. + * @dentry contains the dentry structure of the new file. + * @mode contains the mode of the new file. + * @dev contains the undecoded device number. Use new_decode_dev() to get + * the decoded device number. + * Return 0 if permission is granted. * @inode_rename: * Check for permission to rename a file or directory. * @old_dir contains the inode structure for parent of the old link. @@ -375,6 +418,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new_dir contains the inode structure for parent of the new link. * @new_dentry contains the dentry structure of the new link. * Return 0 if permission is granted. + * @path_rename: + * Check for permission to rename a file or directory. + * @old_dir contains the path structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. + * @new_dir contains the path structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -403,6 +453,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @dentry contains the dentry structure for the file. * @attr is the iattr structure containing the new file attributes. * Return 0 if permission is granted. + * @path_truncate: + * Check permission before truncating a file. + * @path contains the path structure for the file. + * @length is the new length of the file. + * @time_attrs is the flags passed to do_truncate(). + * Return 0 if permission is granted. * @inode_getattr: * Check permission before obtaining file attributes. * @mnt is the vfsmount where the dentry was looked up @@ -1331,6 +1387,22 @@ struct security_operations { struct super_block *newsb); int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); +#ifdef CONFIG_SECURITY_PATH + int (*path_unlink) (struct path *dir, struct dentry *dentry); + int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode); + int (*path_rmdir) (struct path *dir, struct dentry *dentry); + int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode, + unsigned int dev); + int (*path_truncate) (struct path *path, loff_t length, + unsigned int time_attrs); + int (*path_symlink) (struct path *dir, struct dentry *dentry, + const char *old_name); + int (*path_link) (struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry); + int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, struct dentry *new_dentry); +#endif + int (*inode_alloc_security) (struct inode *inode); void (*inode_free_security) (struct inode *inode); int (*inode_init_security) (struct inode *inode, struct inode *dir, @@ -2705,6 +2777,71 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #endif /* CONFIG_SECURITY_NETWORK_XFRM */ +#ifdef CONFIG_SECURITY_PATH +int security_path_unlink(struct path *dir, struct dentry *dentry); +int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode); +int security_path_rmdir(struct path *dir, struct dentry *dentry); +int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, + unsigned int dev); +int security_path_truncate(struct path *path, loff_t length, + unsigned int time_attrs); +int security_path_symlink(struct path *dir, struct dentry *dentry, + const char *old_name); +int security_path_link(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry); +int security_path_rename(struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, struct dentry *new_dentry); +#else /* CONFIG_SECURITY_PATH */ +static inline int security_path_unlink(struct path *dir, struct dentry *dentry) +{ + return 0; +} + +static inline int security_path_mkdir(struct path *dir, struct dentry *dentry, + int mode) +{ + return 0; +} + +static inline int security_path_rmdir(struct path *dir, struct dentry *dentry) +{ + return 0; +} + +static inline int security_path_mknod(struct path *dir, struct dentry *dentry, + int mode, unsigned int dev) +{ + return 0; +} + +static inline int security_path_truncate(struct path *path, loff_t length, + unsigned int time_attrs) +{ + return 0; +} + +static inline int security_path_symlink(struct path *dir, struct dentry *dentry, + const char *old_name) +{ + return 0; +} + +static inline int security_path_link(struct dentry *old_dentry, + struct path *new_dir, + struct dentry *new_dentry) +{ + return 0; +} + +static inline int security_path_rename(struct path *old_dir, + struct dentry *old_dentry, + struct path *new_dir, + struct dentry *new_dentry) +{ + return 0; +} +#endif /* CONFIG_SECURITY_PATH */ + #ifdef CONFIG_KEYS #ifdef CONFIG_SECURITY -- cgit v1.2.3 From c2452f32786159ed85f0e4b21fec09258f822fc8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Mon, 1 Dec 2008 09:33:43 +0100 Subject: shrink struct dentry struct dentry is one of the most critical structures in the kernel. So it's sad to see it going neglected. With CONFIG_PROFILING turned on (which is probably the common case at least for distros and kernel developers), sizeof(struct dcache) == 208 here (64-bit). This gives 19 objects per slab. I packed d_mounted into a hole, and took another 4 bytes off the inline name length to take the padding out from the end of the structure. This shinks it to 200 bytes. I could have gone the other way and increased the length to 40, but I'm aiming for a magic number, read on... I then got rid of the d_cookie pointer. This shrinks it to 192 bytes. Rant: why was this ever a good idea? The cookie system should increase its hash size or use a tree or something if lookups are a problem. Also the "fast dcookie lookups" in oprofile should be moved into the dcookie code -- how can oprofile possibly care about the dcookie_mutex? It gets dropped after get_dcookie() returns so it can't be providing any sort of protection. At 192 bytes, 21 objects fit into a 4K page, saving about 3MB on my system with ~140 000 entries allocated. 192 is also a multiple of 64, so we get nice cacheline alignment on 64 and 32 byte line systems -- any given dentry will now require 3 cachelines to touch all fields wheras previously it would require 4. I know the inline name size was chosen quite carefully, however with the reduction in cacheline footprint, it should actually be just about as fast to do a name lookup for a 36 character name as it was before the patch (and faster for other sizes). The memory footprint savings for names which are <= 32 or > 36 bytes long should more than make up for the memory cost for 33-36 byte names. Performance is a feature... Signed-off-by: Al Viro --- include/linux/dcache.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index a37359d0bad1..c66d22487bf8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -75,14 +75,22 @@ full_name_hash(const unsigned char *name, unsigned int len) return end_name_hash(hash); } -struct dcookie_struct; - -#define DNAME_INLINE_LEN_MIN 36 +/* + * Try to keep struct dentry aligned on 64 byte cachelines (this will + * give reasonable cacheline footprint with larger lines without the + * large memory footprint increase). + */ +#ifdef CONFIG_64BIT +#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */ +#else +#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */ +#endif struct dentry { atomic_t d_count; unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ + int d_mounted; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ /* @@ -107,10 +115,7 @@ struct dentry { struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ void *d_fsdata; /* fs-specific data */ -#ifdef CONFIG_PROFILING - struct dcookie_struct *d_cookie; /* cookie, if any */ -#endif - int d_mounted; + unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; @@ -177,6 +182,8 @@ d_iput: no no no yes #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ +#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ + extern spinlock_t dcache_lock; extern seqlock_t rename_lock; -- cgit v1.2.3 From dded4f4d5048e64a01cf52eed4d27c8cb2600525 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 1 Dec 2008 14:34:50 -0800 Subject: include: linux/fs.h: put declarations in __KERNEL__ include/linux/fs.h contains externs for a bunch of variables. That obviously belongs under ifdef __KERNEL__. Signed-off-by: Jan Engelhardt Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 001ded4845b4..c5e4c5c74034 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -21,7 +21,6 @@ /* Fixed constants first: */ #undef NR_OPEN -extern int sysctl_nr_open; #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ #define BLOCK_SIZE_BITS 10 @@ -38,21 +37,13 @@ struct files_stat_struct { int nr_free_files; /* read only */ int max_files; /* tunable */ }; -extern struct files_stat_struct files_stat; -extern int get_max_files(void); struct inodes_stat_t { int nr_inodes; int nr_unused; int dummy[5]; /* padding for sysctl ABI compatibility */ }; -extern struct inodes_stat_t inodes_stat; -extern int leases_enable, lease_break_time; - -#ifdef CONFIG_DNOTIFY -extern int dir_notify_enable; -#endif #define NR_FILE 8192 /* this can well be larger on a larger system */ @@ -330,6 +321,15 @@ extern void __init inode_init(void); extern void __init inode_init_early(void); extern void __init files_init(unsigned long); +extern struct files_stat_struct files_stat; +extern int get_max_files(void); +extern int sysctl_nr_open; +extern struct inodes_stat_t inodes_stat; +extern int leases_enable, lease_break_time; +#ifdef CONFIG_DNOTIFY +extern int dir_notify_enable; +#endif + struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); -- cgit v1.2.3 From 035146851cfa2fe24c1d9dc7637bb009ad06b2f7 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Fri, 19 Dec 2008 20:47:11 +0000 Subject: vfs: introduce helper function to safely NUL-terminate symlinks A number of filesystems were potentially triggering kernel bugs due to corrupted symlink names on disk. This function helps safely terminate the names. Cc: Al Viro Cc: Andrew Morton Signed-off-by: Duane Griffin Signed-off-by: Al Viro --- include/linux/namei.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 99eb80306dc5..fc2e03579877 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -94,4 +94,9 @@ static inline char *nd_get_link(struct nameidata *nd) return nd->saved_names[nd->depth]; } +static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) +{ + ((char *) name)[min(len, maxlen)] = '\0'; +} + #endif /* _LINUX_NAMEI_H */ -- cgit v1.2.3 From 3fb64190aa3c23c10e6e9fd0124ac030115c99bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Oct 2008 09:58:10 +0200 Subject: pass a struct path * to may_open No need for the nameidata in may_open - a struct path is enough. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c5e4c5c74034..3468df5a06e0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1869,7 +1869,7 @@ extern void free_write_pipe(struct file *); extern struct file *do_filp_open(int dfd, const char *pathname, int open_flag, int mode); -extern int may_open(struct nameidata *, int, int); +extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); extern struct file * open_exec(const char *); -- cgit v1.2.3 From cb23beb55100171646e69e248fb45f10db6e99a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Oct 2008 09:59:29 +0200 Subject: kill vfs_permission With all the nameidata removal there's no point anymore for this helper. Of the three callers left two will go away with the next lookup series anyway. Also add proper kerneldoc to inode_permission as this is the main permission check routine now. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3468df5a06e0..fd615986a41c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1212,7 +1212,6 @@ extern void unlock_super(struct super_block *); /* * VFS helper functions.. */ -extern int vfs_permission(struct nameidata *, int); extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); extern int vfs_mkdir(struct inode *, struct dentry *, int); extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); -- cgit v1.2.3 From 18d8fda7c3c9439be04d7ea2e82da2513b121acb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 26 Dec 2008 00:35:37 -0500 Subject: take init_fs to saner place Signed-off-by: Al Viro --- include/linux/fs_struct.h | 6 ------ include/linux/init_task.h | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 9e5a06e78d02..a97c053d3a9a 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -10,12 +10,6 @@ struct fs_struct { struct path root, pwd; }; -#define INIT_FS { \ - .count = ATOMIC_INIT(1), \ - .lock = RW_LOCK_UNLOCKED, \ - .umask = 0022, \ -} - extern struct kmem_cache *fs_cachep; extern void exit_fs(struct task_struct *); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 959f5522d10a..2f3c2d4ef73b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -12,6 +12,7 @@ #include extern struct files_struct init_files; +extern struct fs_struct init_fs; #define INIT_KIOCTX(name, which_mm) \ { \ -- cgit v1.2.3 From b6b3fdead251d432f32f2cfce2a893ab8a658110 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Dec 2008 09:35:45 -0800 Subject: filp_cachep can be static in fs/file_table.c Instead of creating the "filp" kmem_cache in vfs_caches_init(), we can do it a litle be later in files_init(), so that filp_cachep is static to fs/file_table.c Acked-by: Paul E. McKenney Signed-off-by: Eric Dumazet Signed-off-by: Al Viro --- include/linux/fdtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 4aab6f12cfab..09d6c5bbdddd 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -57,8 +57,6 @@ struct files_struct { #define files_fdtable(files) (rcu_dereference((files)->fdt)) -extern struct kmem_cache *filp_cachep; - struct file_operations; struct vfsmount; struct dentry; -- cgit v1.2.3 From 6badd79bd002788aaec27b50a74ab69ef65ab8ee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 26 Dec 2008 00:57:40 -0500 Subject: kill ->dir_notify() Remove the hopelessly misguided ->dir_notify(). The only instance (cifs) has been broken by design from the very beginning; the objects it creates are never destroyed, keep references to struct file they can outlive, nothing that could possibly evict them exists on close(2) path *and* no locking whatsoever is done to prevent races with close(), should the previous, er, deficiencies someday be dealt with. Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index fd615986a41c..be16ce01fb1b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1309,7 +1309,6 @@ struct file_operations { ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*dir_notify)(struct file *filp, unsigned long arg); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); -- cgit v1.2.3 From 261bca86ed4f7f391d1938167624e78da61dcc6b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 30 Dec 2008 01:48:21 -0500 Subject: nfsd/create race fixes, infrastructure new helpers - insert_inode_locked() and insert_inode_locked4(). Hash new inode, making sure that there's no such inode in icache already. If there is and it does not end up unhashed (as would happen if we have nfsd trying to resolve a bogus fhandle), fail. Otherwise insert our inode into hash and succeed. In either case have i_state set to new+locked; cleanup ends up being simpler with such calling conventions. Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index be16ce01fb1b..e2170ee21e18 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1902,6 +1902,8 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino); extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); +extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); +extern int insert_inode_locked(struct inode *); extern void unlock_new_inode(struct inode *); extern void __iget(struct inode * inode); -- cgit v1.2.3 From ab53d472e785e51fdfc08fc1d66252c1153e6c0f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:19 +1030 Subject: bitmap: find_last_bit() Impact: New API As the name suggests. For the moment everyone uses the generic one. Signed-off-by: Rusty Russell --- include/linux/bitops.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 024f2b027244..61829139795a 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -134,9 +134,20 @@ extern unsigned long find_first_bit(const unsigned long *addr, */ extern unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); - #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ +#ifdef CONFIG_GENERIC_FIND_LAST_BIT +/** + * find_last_bit - find the last set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first set bit, or size. + */ +extern unsigned long find_last_bit(const unsigned long *addr, + unsigned long size); +#endif /* CONFIG_GENERIC_FIND_LAST_BIT */ + #ifdef CONFIG_GENERIC_FIND_NEXT_BIT /** -- cgit v1.2.3 From 6b954823c24f04ed026a8517f6bab5abda279db8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:25 +1030 Subject: cpumask: convert kernel time functions Impact: Use new APIs Convert kernel/time functions to use struct cpumask *. Note the ugly bitmap declarations in tick-broadcast.c. These should be cpumask_var_t, but there was no obvious initialization function to put the alloc_cpumask_var() calls in. This was safe. (Eventually 'struct cpumask' will be undefined for CONFIG_CPUMASK_OFFSTACK, so we use a bitmap here to show we really mean it). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- include/linux/tick.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index b6ec8189ac0c..469b82d88b3b 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -84,10 +84,10 @@ static inline void tick_cancel_sched_timer(int cpu) { } # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern struct tick_device *tick_get_broadcast_device(void); -extern cpumask_t *tick_get_broadcast_mask(void); +extern struct cpumask *tick_get_broadcast_mask(void); # ifdef CONFIG_TICK_ONESHOT -extern cpumask_t *tick_get_broadcast_oneshot_mask(void); +extern struct cpumask *tick_get_broadcast_oneshot_mask(void); # endif # endif /* BROADCAST */ -- cgit v1.2.3 From d036e67b40f52bdd95392390108defbac7e53837 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:26 +1030 Subject: cpumask: convert kernel/irq Impact: Reduce stack usage, use new cpumask API. ALPHA mod! Main change is that irq_default_affinity becomes a cpumask_var_t, so treat it as a pointer (this effects alpha). Signed-off-by: Rusty Russell --- include/linux/interrupt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index dfaee6bd265b..91f1ef8e5810 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -109,7 +109,7 @@ extern void enable_irq(unsigned int irq); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) -extern cpumask_t irq_default_affinity; +extern cpumask_var_t irq_default_affinity; extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); -- cgit v1.2.3 From bd232f97b30f6bb630efa136a777647545db3039 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:26 +1030 Subject: cpumask: convert RCU implementations Impact: use new cpumask API. rcu_ctrlblk contains a cpumask, and it's highly optimized so I don't want a cpumask_var_t (ie. a pointer) for the CONFIG_CPUMASK_OFFSTACK case. It could use a dangling bitmap, and be allocated in __rcu_init to save memory, but for the moment we use a bitmap. (Eventually 'struct cpumask' will be undefined for CONFIG_CPUMASK_OFFSTACK, so we use a bitmap here to show we really mean it). We remove on-stack cpumasks, using cpumask_var_t for rcu_torture_shuffle_tasks() and for_each_cpu_and in force_quiescent_state(). Signed-off-by: Rusty Russell --- include/linux/rcuclassic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 301dda829e37..f3f697df1d71 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -59,8 +59,8 @@ struct rcu_ctrlblk { int signaled; spinlock_t lock ____cacheline_internodealigned_in_smp; - cpumask_t cpumask; /* CPUs that need to switch in order */ - /* for current batch to proceed. */ + DECLARE_BITMAP(cpumask, NR_CPUS); /* CPUs that need to switch for */ + /* current batch to proceed. */ } ____cacheline_internodealigned_in_smp; /* Is batch a before batch b ? */ -- cgit v1.2.3 From 41c7bb9588904eb060a95bcad47bd3804a1ece25 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:28 +1030 Subject: cpumask: convert rest of files in kernel/ Impact: Reduce stack usage, use new cpumask API. Mainly changing cpumask_t to 'struct cpumask' and similar simple API conversion. Two conversions worth mentioning: 1) we use cpumask_any_but to avoid a temporary in kernel/softlockup.c, 2) Use cpumask_var_t in taskstats_user_cmd(). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: Balbir Singh Cc: Ingo Molnar --- include/linux/stop_machine.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index faf1519b5adc..74d59a641362 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -23,7 +23,7 @@ * * This can be thought of as a very heavy write lock, equivalent to * grabbing every spinlock in the kernel. */ -int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus); +int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); /** * __stop_machine: freeze the machine on all CPUs and run this function @@ -34,11 +34,11 @@ int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus); * Description: This is a special version of the above, which assumes cpus * won't come or go while it's being called. Used by hotplug cpu. */ -int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus); +int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); #else static inline int stop_machine(int (*fn)(void *), void *data, - const cpumask_t *cpus) + const struct cpumask *cpus) { int ret; local_irq_disable(); -- cgit v1.2.3 From 8c384cdee3e04d6194a2c2b192b624754f990835 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:30 +1030 Subject: cpumask: CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS Impact: new debug CONFIG options This helps find unconverted code. It currently breaks compile horribly, but we never wanted a flag day so that's expected. Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 7c178a6baae3..9f315382610b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -144,6 +144,7 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; extern cpumask_t _unused_cpumask_arg_; +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) { @@ -267,6 +268,7 @@ static inline void __cpus_shift_left(cpumask_t *dstp, { bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } +#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * @@ -304,6 +306,7 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) return to_cpumask(p); } +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS /* * In cases where we take the address of the cpumask immediately, * gcc optimizes it out (it's a constant) and there's no huge stack @@ -389,19 +392,22 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, { bitmap_fold(dstp->bits, origp->bits, sz, nbits); } +#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ #if NR_CPUS == 1 #define nr_cpu_ids 1 +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS #define first_cpu(src) ({ (void)(src); 0; }) #define next_cpu(n, src) ({ (void)(src); 1; }) #define any_online_cpu(mask) 0 #define for_each_cpu_mask(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) - +#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ #else /* NR_CPUS > 1 */ extern int nr_cpu_ids; +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS int __first_cpu(const cpumask_t *srcp); int __next_cpu(int n, const cpumask_t *srcp); int __any_online_cpu(const cpumask_t *mask); @@ -413,8 +419,10 @@ int __any_online_cpu(const cpumask_t *mask); for ((cpu) = -1; \ (cpu) = next_cpu((cpu), (mask)), \ (cpu) < NR_CPUS; ) +#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ #endif +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS #if NR_CPUS <= 64 #define next_cpu_nr(n, src) next_cpu(n, src) @@ -432,6 +440,7 @@ int __next_cpu_nr(int n, const cpumask_t *srcp); (cpu) < nr_cpu_ids; ) #endif /* NR_CPUS > 64 */ +#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ /* * The following particular system cpumasks and operations manage -- cgit v1.2.3 From ebdab07dad3d3a008e519b0a028e1e1ad5ecaef0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 2 Jan 2009 16:12:48 +0100 Subject: ide: move sysfs support to ide-sysfs.c While at it: - media_string() -> ide_media_string() There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index e99c56de7f56..62fccaea3110 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1533,6 +1533,7 @@ void ide_unregister_region(struct gendisk *); void ide_undecoded_slave(ide_drive_t *); void ide_port_apply_params(ide_hwif_t *); +int ide_sysfs_register_port(ide_hwif_t *); struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **); void ide_host_free(struct ide_host *); @@ -1627,6 +1628,9 @@ extern struct mutex ide_cfg_mtx; #define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) +char *ide_media_string(ide_drive_t *); + +extern struct device_attribute ide_dev_attrs[]; extern struct bus_type ide_bus_type; extern struct class *ide_port_class; -- cgit v1.2.3 From 295f00042aaf6b553b5f37348f89bab463d4a469 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 2 Jan 2009 16:12:48 +0100 Subject: ide: don't execute the next queued command from the hard-IRQ context (v2) * Tell the block layer that we are not done handling requests by using blk_plug_device() in ide_do_request() (request handling function) and ide_timer_expiry() (timeout handler) if the queue is not empty. * Remove optimization which directly calls ide_do_request() for the next queued command from the ide_intr() (IRQ handler) and ide_timer_expiry(). * Remove no longer needed IRQ masking from ide_do_request() - in case of IDE ports needing serialization disable_irq_nosync()/enable_irq() was used for the (possibly shared) IRQ of the other IDE port. * Put the misplaced comment in the right place in ide_do_request(). * Drop no longer needed 'int masked_irq' argument from ide_do_request(). * Merge ide_do_request() into do_ide_request(). * Remove no longer needed IDE_NO_IRQ define. While at it: * Don't use HWGROUP() macro in do_ide_request(). * Use __func__ in ide_intr(). This patch reduces IRQ hadling latency for IDE and improves the system-wide handling of shared IRQs (which should result in more timeout resistant and stable IDE systems). It also makes it possible to do some further changes later (i.e. replace some busy-waiting delays with sleeping equivalents). v2: Changes per review from Elias Oltmanns: - fix wrong goto statement in 'if (startstop == ide_stopped)' block - use spin_unlock_irq() - don't use obsolete HWIF() macro Cc: Elias Oltmanns Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 62fccaea3110..968ca8f60531 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -32,13 +32,6 @@ # define SUPPORT_VLB_SYNC 1 #endif -/* - * Used to indicate "no IRQ", should be a value that cannot be an IRQ - * number. - */ - -#define IDE_NO_IRQ (-1) - typedef unsigned char byte; /* used everywhere */ /* -- cgit v1.2.3 From 631de3708d595d153e8a510a3608689290f4c0ed Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 2 Jan 2009 16:12:50 +0100 Subject: ide: add ide_[un]lock_hwgroup() helpers Add ide_[un]lock_hwgroup() inline helpers for obtaining exclusive access to the given hwgroup and update the core code accordingly. [ This change besides making code saner results in more efficient use of ide_{get,release}_lock(). ] Cc: Michael Schmitz Cc: Geert Uytterhoeven Cc: Elias Oltmanns Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 968ca8f60531..f408d6123f14 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1280,6 +1280,26 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); + +static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup) +{ + if (hwgroup->busy) + return 1; + + hwgroup->busy = 1; + /* for atari only */ + ide_get_lock(ide_intr, hwgroup); + + return 0; +} + +static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup) +{ + /* for atari only */ + ide_release_lock(); + hwgroup->busy = 0; +} + extern void do_ide_request(struct request_queue *); void ide_init_disk(struct gendisk *, ide_drive_t *); -- cgit v1.2.3 From 201bffa46466b4afdf7d29db8eca3fa5decb39c8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 2 Jan 2009 16:12:50 +0100 Subject: ide: use per-device request queue locks (v2) * Move hack for flush requests from choose_drive() to do_ide_request(). * Add ide_plug_device() helper and convert core IDE code from using per-hwgroup lock as a request lock to use the ->queue_lock instead. * Remove no longer needed: - choose_drive() function - WAKEUP() macro - 'sleeping' flag from ide_hwif_t - 'service_{start,time}' fields from ide_drive_t This patch results in much simpler and more maintainable code (besides being a scalability improvement). v2: * Fixes/improvements based on review from Elias: - take as many requests off the queue as possible - remove now redundant BUG_ON() Cc: Elias Oltmanns Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f408d6123f14..5f86ad40ee7e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -603,8 +603,6 @@ struct ide_drive_s { unsigned long dev_flags; unsigned long sleep; /* sleep until this time */ - unsigned long service_start; /* time we started last request */ - unsigned long service_time; /* service time of last request */ unsigned long timeout; /* max time to wait for irq */ special_t special; /* special action flags */ @@ -872,8 +870,6 @@ typedef struct hwgroup_s { /* BOOL: protects all fields below */ volatile int busy; - /* BOOL: wake us up on timer expiry */ - unsigned int sleeping : 1; /* BOOL: polling active & poll_timeout field valid */ unsigned int polling : 1; -- cgit v1.2.3 From bf64741fe89280bd81a9e3a1beadec1570861848 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Jan 2009 16:12:50 +0100 Subject: ide: make IDE_AFLAG_.. numbering continuous again Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 5f86ad40ee7e..eb4c01f7f253 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -473,53 +473,53 @@ enum { /* ide-cd */ /* Drive cannot eject the disc. */ - IDE_AFLAG_NO_EJECT = (1 << 3), + IDE_AFLAG_NO_EJECT = (1 << 1), /* Drive is a pre ATAPI 1.2 drive. */ - IDE_AFLAG_PRE_ATAPI12 = (1 << 4), + IDE_AFLAG_PRE_ATAPI12 = (1 << 2), /* TOC addresses are in BCD. */ - IDE_AFLAG_TOCADDR_AS_BCD = (1 << 5), + IDE_AFLAG_TOCADDR_AS_BCD = (1 << 3), /* TOC track numbers are in BCD. */ - IDE_AFLAG_TOCTRACKS_AS_BCD = (1 << 6), + IDE_AFLAG_TOCTRACKS_AS_BCD = (1 << 4), /* * Drive does not provide data in multiples of SECTOR_SIZE * when more than one interrupt is needed. */ - IDE_AFLAG_LIMIT_NFRAMES = (1 << 7), + IDE_AFLAG_LIMIT_NFRAMES = (1 << 5), /* Saved TOC information is current. */ - IDE_AFLAG_TOC_VALID = (1 << 9), + IDE_AFLAG_TOC_VALID = (1 << 6), /* We think that the drive door is locked. */ - IDE_AFLAG_DOOR_LOCKED = (1 << 10), + IDE_AFLAG_DOOR_LOCKED = (1 << 7), /* SET_CD_SPEED command is unsupported. */ - IDE_AFLAG_NO_SPEED_SELECT = (1 << 11), - IDE_AFLAG_VERTOS_300_SSD = (1 << 12), - IDE_AFLAG_VERTOS_600_ESD = (1 << 13), - IDE_AFLAG_SANYO_3CD = (1 << 14), - IDE_AFLAG_FULL_CAPS_PAGE = (1 << 15), - IDE_AFLAG_PLAY_AUDIO_OK = (1 << 16), - IDE_AFLAG_LE_SPEED_FIELDS = (1 << 17), + IDE_AFLAG_NO_SPEED_SELECT = (1 << 8), + IDE_AFLAG_VERTOS_300_SSD = (1 << 9), + IDE_AFLAG_VERTOS_600_ESD = (1 << 10), + IDE_AFLAG_SANYO_3CD = (1 << 11), + IDE_AFLAG_FULL_CAPS_PAGE = (1 << 12), + IDE_AFLAG_PLAY_AUDIO_OK = (1 << 13), + IDE_AFLAG_LE_SPEED_FIELDS = (1 << 14), /* ide-floppy */ /* Avoid commands not supported in Clik drive */ - IDE_AFLAG_CLIK_DRIVE = (1 << 19), + IDE_AFLAG_CLIK_DRIVE = (1 << 15), /* Requires BH algorithm for packets */ - IDE_AFLAG_ZIP_DRIVE = (1 << 20), + IDE_AFLAG_ZIP_DRIVE = (1 << 16), /* Supports format progress report */ - IDE_AFLAG_SRFP = (1 << 22), + IDE_AFLAG_SRFP = (1 << 17), /* ide-tape */ - IDE_AFLAG_IGNORE_DSC = (1 << 23), + IDE_AFLAG_IGNORE_DSC = (1 << 18), /* 0 When the tape position is unknown */ - IDE_AFLAG_ADDRESS_VALID = (1 << 24), + IDE_AFLAG_ADDRESS_VALID = (1 << 19), /* Device already opened */ - IDE_AFLAG_BUSY = (1 << 25), + IDE_AFLAG_BUSY = (1 << 20), /* Attempt to auto-detect the current user block size */ - IDE_AFLAG_DETECT_BS = (1 << 26), + IDE_AFLAG_DETECT_BS = (1 << 21), /* Currently on a filemark */ - IDE_AFLAG_FILEMARK = (1 << 27), + IDE_AFLAG_FILEMARK = (1 << 22), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 28), + IDE_AFLAG_MEDIUM_PRESENT = (1 << 23), - IDE_AFLAG_NO_AUTOCLOSE = (1 << 29), + IDE_AFLAG_NO_AUTOCLOSE = (1 << 24), }; /* device flags */ -- cgit v1.2.3 From 392de1d53dd40e2eebee3a0a26aa647a3865ca78 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Jan 2009 16:12:52 +0100 Subject: ide-atapi: accomodate transfer length calculation for ide-cd ... by factoring it out of ide_cd_do_request() into a helper, as suggested by Bart. There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov [bart: BLK_DEV_IDECD needs to select IDE_ATAPI now] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index eb4c01f7f253..e35ff6827897 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1254,6 +1254,8 @@ static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc) int ide_scsi_expiry(ide_drive_t *); +int ide_cd_get_xferlen(struct request *); + ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int, ide_expiry_t *); ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); -- cgit v1.2.3 From 4cad085efbce8dcc5006b0d1034089758b4fc7ba Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Jan 2009 16:12:53 +0100 Subject: ide-cd: move cdrom_timer_expiry to ide-atapi.c - cdrom_timer_expiry -> ide_cd_expiry - remove expiry-arg to ide_issue_pc as it is redundant now - ide_debug_log -> debug_log Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index e35ff6827897..e20e0b5c1739 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -396,6 +396,7 @@ enum { * This is used for several packet commands (not for READ/WRITE commands). */ #define IDE_PC_BUFFER_SIZE 256 +#define ATAPI_WAIT_PC (60 * HZ) struct ide_atapi_pc { /* actual packet bytes */ @@ -1253,10 +1254,11 @@ static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc) } int ide_scsi_expiry(ide_drive_t *); +int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int, ide_expiry_t *); +ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int); ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); -- cgit v1.2.3 From 5d655a03b847fbe5353a8a74bbeb75e18708dca3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Jan 2009 16:12:54 +0100 Subject: ide-atapi: remove ide-scsi remnants from ide_pc_intr() As a result, remove now unused ide_scsi_get_timeout and ide_scsi_expiry. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index e20e0b5c1739..257524ee1af2 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1248,12 +1248,6 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *, struct gendisk *); -static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc) -{ - return max_t(unsigned long, WAIT_CMD, pc->timeout - jiffies); -} - -int ide_scsi_expiry(ide_drive_t *); int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -- cgit v1.2.3 From 5317464dccd0c03026d60f1e9968de4f9cd23f69 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Jan 2009 16:12:54 +0100 Subject: ide: remove the last ide-scsi remnants Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 257524ee1af2..ad57a4492941 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -559,28 +559,26 @@ enum { IDE_DFLAG_NODMA = (1 << 16), /* powermanagment told us not to do anything, so sleep nicely */ IDE_DFLAG_BLOCKED = (1 << 17), - /* ide-scsi emulation */ - IDE_DFLAG_SCSI = (1 << 18), /* sleeping & sleep field valid */ - IDE_DFLAG_SLEEPING = (1 << 19), - IDE_DFLAG_POST_RESET = (1 << 20), - IDE_DFLAG_UDMA33_WARNED = (1 << 21), - IDE_DFLAG_LBA48 = (1 << 22), + IDE_DFLAG_SLEEPING = (1 << 18), + IDE_DFLAG_POST_RESET = (1 << 19), + IDE_DFLAG_UDMA33_WARNED = (1 << 20), + IDE_DFLAG_LBA48 = (1 << 21), /* status of write cache */ - IDE_DFLAG_WCACHE = (1 << 23), + IDE_DFLAG_WCACHE = (1 << 22), /* used for ignoring ATA_DF */ - IDE_DFLAG_NOWERR = (1 << 24), + IDE_DFLAG_NOWERR = (1 << 23), /* retrying in PIO */ - IDE_DFLAG_DMA_PIO_RETRY = (1 << 25), - IDE_DFLAG_LBA = (1 << 26), + IDE_DFLAG_DMA_PIO_RETRY = (1 << 24), + IDE_DFLAG_LBA = (1 << 25), /* don't unload heads */ - IDE_DFLAG_NO_UNLOAD = (1 << 27), + IDE_DFLAG_NO_UNLOAD = (1 << 26), /* heads unloaded, please don't reset port */ - IDE_DFLAG_PARKED = (1 << 28), - IDE_DFLAG_MEDIA_CHANGED = (1 << 29), + IDE_DFLAG_PARKED = (1 << 27), + IDE_DFLAG_MEDIA_CHANGED = (1 << 28), /* write protect */ - IDE_DFLAG_WP = (1 << 30), - IDE_DFLAG_FORMAT_IN_PROGRESS = (1 << 31), + IDE_DFLAG_WP = (1 << 29), + IDE_DFLAG_FORMAT_IN_PROGRESS = (1 << 30), }; struct ide_drive_s { -- cgit v1.2.3 From 28ad91db77755f1c49d79652de11b28ee2cfbf03 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Jan 2009 16:12:56 +0100 Subject: ide-atapi: remove timeout arg to ide_issue_pc There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ad57a4492941..db5ef8ae1ab9 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1250,7 +1250,7 @@ int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int); +ide_startstop_t ide_issue_pc(ide_drive_t *); ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); -- cgit v1.2.3 From 56c451f4b583ccdf80c9e676179c9cb49de86745 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 18 Dec 2008 14:49:37 +0900 Subject: [SCSI] block: fix the partial mappings with struct rq_map_data This fixes bio_copy_user_iov to properly handle the partial mappings with struct rq_map_data (which only sg uses for now but st and osst will shortly). It adds the offset member to struct rq_map_data and changes blk_rq_map_user to update it so that bio_copy_user_iov can add an appropriate page frame via bio_add_pc_page(). Signed-off-by: FUJITA Tomonori Acked-by: Jens Axboe Signed-off-by: James Bottomley --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7035cec583b6..811e5342c452 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -690,6 +690,7 @@ struct rq_map_data { struct page **pages; int page_order; int nr_entries; + unsigned long offset; }; struct req_iterator { -- cgit v1.2.3 From 97ae77a1cd332c7b011d71315c8faabce6840c72 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 18 Dec 2008 14:49:38 +0900 Subject: [SCSI] block: make blk_rq_map_user take a NULL user-space buffer for WRITE The commit 818827669d85b84241696ffef2de485db46b0b5e (block: make blk_rq_map_user take a NULL user-space buffer) extended blk_rq_map_user to accept a NULL user-space buffer with a READ command. It was necessary to convert sg to use the block layer mapping API. This patch extends blk_rq_map_user again for a WRITE command. It is necessary to convert st and osst drivers to use the block layer apping API. Signed-off-by: FUJITA Tomonori Acked-by: Jens Axboe Signed-off-by: James Bottomley --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 811e5342c452..044467ef7b11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -691,6 +691,7 @@ struct rq_map_data { int page_order; int nr_entries; unsigned long offset; + int null_mapped; }; struct req_iterator { -- cgit v1.2.3 From f153b82121b0366fe0e5f9553545cce237335175 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Jan 2009 09:23:03 -0800 Subject: Sanitize gcc version header includes - include the gcc version-dependent header files from the generic gcc header file, rather than the other way around (iow: don't make the non-gcc header file have to know about gcc versions) - don't include compiler-gcc4.h for gcc 5 (for whenever it gets released). That's just confusing and made us do odd things in the gcc4 header file (testing that we really had version 4!) - generate the name from the __GNUC__ version directly, rather than having a mess of #if conditionals. Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 5 +++++ include/linux/compiler-gcc3.h | 3 --- include/linux/compiler-gcc4.h | 5 +---- include/linux/compiler.h | 8 ++------ 4 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5c8351b859f0..af40f8eb86f0 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -61,3 +61,8 @@ #define noinline __attribute__((noinline)) #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) + +#define __gcc_header(x) #x +#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) +#define gcc_header(x) _gcc_header(x) +#include gcc_header(__GNUC__) diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h index e5eb795f78a1..2befe6513ce4 100644 --- a/include/linux/compiler-gcc3.h +++ b/include/linux/compiler-gcc3.h @@ -2,9 +2,6 @@ #error "Please don't include directly, include instead." #endif -/* These definitions are for GCC v3.x. */ -#include - #if __GNUC_MINOR__ >= 3 # define __used __attribute__((__used__)) #else diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 974f5b7bb205..aa426214331b 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -2,9 +2,6 @@ #error "Please don't include directly, include instead." #endif -/* These definitions are for GCC v4.x. */ -#include - #define __used __attribute__((__used__)) #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) @@ -16,7 +13,7 @@ */ #define uninitialized_var(x) x = x -#if !(__GNUC__ == 4 && __GNUC_MINOR__ < 3) +#if __GNUC_MINOR__ >= 3 /* Mark functions as cold. gcc will assume any path leading to a call to them will be unlikely. This means a lot of manual unlikely()s are unnecessary now for any paths leading to the usual suspects diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ea7c6be354b7..d95da1020f1c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -36,12 +36,8 @@ extern void __chk_io_ptr(const volatile void __iomem *); #ifdef __KERNEL__ -#if __GNUC__ >= 4 -# include -#elif __GNUC__ == 3 && __GNUC_MINOR__ >= 2 -# include -#else -# error Sorry, your compiler is too old/not recognized. +#ifdef __GNUC__ +#include #endif #define notrace __attribute__((no_instrument_function)) -- cgit v1.2.3 From f9d14250071eda9972e4c9cea745a11185952114 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Jan 2009 09:29:43 -0800 Subject: Disallow gcc versions 4.1.{0,1} These compiler versions are known to miscompile __weak functions and thus generate kernels that don't necessarily work correctly. If a weak function is int he same compilation unit as a caller, gcc may end up inlining it, and thus binding the weak function too early. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27781 for details. Cc: Adrian Bunk Cc: Helge Deller Cc: Rusty Russell Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc4.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index aa426214331b..09992718f9e8 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -2,6 +2,11 @@ #error "Please don't include directly, include instead." #endif +/* GCC 4.1.[01] miscompiles __weak */ +#if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1 +# error Your version of gcc miscompiles the __weak directive +#endif + #define __used __attribute__((__used__)) #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) -- cgit v1.2.3 From a88a69c91256418c5907c2f1f8a0ec0a36f9e6cc Mon Sep 17 00:00:00 2001 From: Joe Peterson Date: Fri, 2 Jan 2009 13:40:53 +0000 Subject: n_tty: Fix loss of echoed characters and remove bkl from n_tty Fixes the loss of echoed (and other ldisc-generated characters) when the tty is stopped or when the driver output buffer is full (happens frequently for input during continuous program output, such as ^C) and removes the Big Kernel Lock from the N_TTY line discipline. Adds an "echo buffer" to the N_TTY line discipline that handles all ldisc-generated output (including echoed characters). Along with the loss of characters, this also fixes the associated loss of sync between tty output and the ldisc state when characters cannot be immediately written to the tty driver. The echo buffer stores (in addition to characters) state operations that need to be done at the time of character output (like management of the column position). This allows echo to cooperate correctly with program output, since the ldisc state remains consistent with actual characters written. Since the echo buffer code now isolates the tty column state code to the process_out* and process_echoes functions, we can remove the Big Kernel Lock (BKL) and replace it with mutex locks. Highlights are: * Handles echo (and other ldisc output) when tty driver buffer is full - continuous program output can block echo * Saves echo when tty is in stopped state (e.g. ^S) - (e.g.: ^Q will correctly cause held characters to be released for output) * Control character pairs (e.g. "^C") are treated atomically and not split up by interleaved program output * Line discipline state is kept consistent with characters sent to the tty driver * Remove the big kernel lock (BKL) from N_TTY line discipline Signed-off-by: Joe Peterson Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 3f4954c55e53..dfc77ded198a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -253,6 +253,7 @@ struct tty_struct { unsigned int column; unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1; unsigned char closing:1; + unsigned char echo_overrun:1; unsigned short minimum_to_wake; unsigned long overrun_time; int num_overrun; @@ -262,11 +263,16 @@ struct tty_struct { int read_tail; int read_cnt; unsigned long read_flags[N_TTY_BUF_SIZE/(8*sizeof(unsigned long))]; + unsigned char *echo_buf; + unsigned int echo_pos; + unsigned int echo_cnt; int canon_data; unsigned long canon_head; unsigned int canon_column; struct mutex atomic_read_lock; struct mutex atomic_write_lock; + struct mutex output_lock; + struct mutex echo_lock; unsigned char *write_buf; int write_cnt; spinlock_t read_lock; -- cgit v1.2.3 From fc6f6238226e6d1248e1967eae2bf556eaf3ac17 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:43:17 +0000 Subject: pty: simplify resize We have special case logic for resizing pty/tty pairs. We also have a per driver resize method so for the pty case we should use it. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 3 +-- include/linux/tty_driver.h | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index dfc77ded198a..f88169787a5f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -360,8 +360,7 @@ extern int tty_write_room(struct tty_struct *tty); extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); -extern int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize *ws); +extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern void tty_shutdown(struct tty_struct *tty); extern void tty_free_termios(struct tty_struct *tty); extern int is_current_pgrp_orphaned(void); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 78416b901589..08e088334dba 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -196,8 +196,7 @@ * Optional: If not provided then the write method is called under * the atomic write lock to keep it serialized with the ldisc. * - * int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty, - * unsigned int rows, unsigned int cols); + * int (*resize)(struct tty_struct *tty, struct winsize *ws) * * Called when a termios request is issued which changes the * requested terminal geometry. @@ -258,8 +257,7 @@ struct tty_operations { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); - int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize *ws); + int (*resize)(struct tty_struct *tty, struct winsize *ws); int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct tty_driver *driver, int line, char *options); -- cgit v1.2.3 From 975a1a7d887048d4afc9201383e11b7af991866b Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 2 Jan 2009 13:44:27 +0000 Subject: And here's a patch (to be applied on top of the last) which prevents this happening again by making use of 'const'. Signed-off-by: Russell King Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/8250_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h index 3209dd46ea7d..b24ff086a662 100644 --- a/include/linux/8250_pci.h +++ b/include/linux/8250_pci.h @@ -31,7 +31,7 @@ struct pciserial_board { struct serial_private; struct serial_private * -pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board); +pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board); void pciserial_remove_ports(struct serial_private *priv); void pciserial_suspend_ports(struct serial_private *priv); void pciserial_resume_ports(struct serial_private *priv); -- cgit v1.2.3 From c9b3976e3fec266be25c5001a70aa0a890b6c476 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:44:56 +0000 Subject: tty: Fix PPP hang under load Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index f88169787a5f..bbbeaef99626 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -301,6 +301,7 @@ struct tty_struct { #define TTY_PUSH 6 /* n_tty private */ #define TTY_CLOSING 7 /* ->close() in progress */ #define TTY_LDISC 9 /* Line discipline attached */ +#define TTY_LDISC_CHANGING 10 /* Line discipline changing */ #define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */ #define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */ #define TTY_PTY_LOCK 16 /* pty private */ -- cgit v1.2.3 From 31f35939d1d9bcfb3099b32c67b896d2792603f9 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:45:05 +0000 Subject: tty_port: Add a port level carrier detect operation This is the first step to generalising the various pieces of waiting logic duplicated in all sorts of serial drivers. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/generic_serial.h | 1 - include/linux/tty.h | 9 +++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/generic_serial.h b/include/linux/generic_serial.h index 4cc913939817..fadff28505bb 100644 --- a/include/linux/generic_serial.h +++ b/include/linux/generic_serial.h @@ -21,7 +21,6 @@ struct real_driver { void (*enable_tx_interrupts) (void *); void (*disable_rx_interrupts) (void *); void (*enable_rx_interrupts) (void *); - int (*get_CD) (void *); void (*shutdown_port) (void*); int (*set_real_termios) (void*); int (*chars_in_buffer) (void*); diff --git a/include/linux/tty.h b/include/linux/tty.h index bbbeaef99626..bc7bae78e22f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -180,8 +180,16 @@ struct signal_struct; * until a hangup so don't use the wrong path. */ +struct tty_port; + +struct tty_port_operations { + /* Return 1 if the carrier is raised */ + int (*carrier_raised)(struct tty_port *port); +}; + struct tty_port { struct tty_struct *tty; /* Back pointer */ + const struct tty_port_operations *ops; /* Port operations */ spinlock_t lock; /* Lock protecting tty field */ int blocked_open; /* Waiting to open */ int count; /* Usage count */ @@ -427,6 +435,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port); extern void tty_port_free_xmit_buf(struct tty_port *port); extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); +extern int tty_port_carrier_raised(struct tty_port *port); extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); -- cgit v1.2.3 From 5d951fb458f847e5485b5251597fbf326000bb3b Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:45:19 +0000 Subject: tty: Pull the dtr raise into tty port This moves another per device special out of what should be shared open wait paths into private methods Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index bc7bae78e22f..5001bbcacff6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -185,6 +185,7 @@ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); + void (*raise_dtr_rts)(struct tty_port *port); }; struct tty_port { @@ -436,6 +437,7 @@ extern void tty_port_free_xmit_buf(struct tty_port *port); extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); +extern void tty_port_raise_dtr_rts(struct tty_port *port); extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); -- cgit v1.2.3 From 3e61696bdc2103107674b06d0daf30b76193e922 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:45:26 +0000 Subject: isicom: redo locking to use tty port locks This helps set the basis for moving block_til_ready into common code. We also introduce a tty_port_hangup helper as this will also be generally needed. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 5001bbcacff6..a1a93140e6e4 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -438,6 +438,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); +extern void tty_port_hangup(struct tty_port *port); extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); -- cgit v1.2.3 From 36c621d82b956ff6ff72273f848af53e6c581aba Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:46:10 +0000 Subject: tty: Introduce a tty_port generic block_til_ready Start sucking more commonality out of the drivers into a single piece of core code. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/tty.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index a1a93140e6e4..61a0ab32cf11 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -439,6 +439,8 @@ extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); +extern int tty_port_block_til_ready(struct tty_port *port, + struct tty_struct *tty, struct file *filp); extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); -- cgit v1.2.3 From 2a6eadbd5a2ae8f458e421f3614f1ad13c0f9a1c Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:46:18 +0000 Subject: tty: Rework istallion to use the tty port changes Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/istallion.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/istallion.h b/include/linux/istallion.h index 0d1840723249..053d5aea925c 100644 --- a/include/linux/istallion.h +++ b/include/linux/istallion.h @@ -61,7 +61,6 @@ struct stliport { int custom_divisor; int close_delay; int closing_wait; - int openwaitcnt; int rc; int argsize; void *argp; -- cgit v1.2.3 From a6614999e800cf3a134ce93ea46ef837e3c0e76e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:46:50 +0000 Subject: tty: Introduce some close helpers for ports Again this is a lot of common code we can unify Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/istallion.h | 1 - include/linux/tty.h | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/istallion.h b/include/linux/istallion.h index 053d5aea925c..7faca98c7d14 100644 --- a/include/linux/istallion.h +++ b/include/linux/istallion.h @@ -59,7 +59,6 @@ struct stliport { unsigned int devnr; int baud_base; int custom_divisor; - int close_delay; int closing_wait; int rc; int argsize; diff --git a/include/linux/tty.h b/include/linux/tty.h index 61a0ab32cf11..fc39db95499f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -441,6 +441,9 @@ extern void tty_port_raise_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); +extern int tty_port_close_start(struct tty_port *port, + struct tty_struct *tty, struct file *filp); +extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty); extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); -- cgit v1.2.3 From 39aced68d664291db3324d0fcf0985ab5626aac2 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Fri, 2 Jan 2009 13:46:58 +0000 Subject: serial: set correct baud_base for Oxford Semiconductor Ltd EXSYS EX-41092 Dual 16950 Serial adapter The PCI-card identified as "Oxford Semiconductor Ltd EXSYS EX-41092 Dual 16950 Serial adapter" is only usable with other devices (i.e. not the same card) after doing a "setserial /dev/ttyS baud_base 115200". This baud_base should be default for this card. Signed-off-by: Niels de Vos Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b6e694454280..fa83dfefc5e0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1766,6 +1766,7 @@ #define PCI_DEVICE_ID_SIIG_8S_20x_650 0x2081 #define PCI_DEVICE_ID_SIIG_8S_20x_850 0x2082 #define PCI_SUBDEVICE_ID_SIIG_QUARTET_SERIAL 0x2050 +#define PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL 0x2530 #define PCI_VENDOR_ID_RADISYS 0x1331 -- cgit v1.2.3 From 60c20fb8c00a2b23308ae4517f145383bc66d291 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Fri, 2 Jan 2009 13:49:04 +0000 Subject: serial: RS485 ioctl structure uses __u32 include linux/types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the commit below a new struct serial_rs485 was introduced for a new ioctl: commit c26c56c0f40e200e61d1390629c806f6adaffbcc Author: Alan Cox Date: Mon Oct 13 10:37:48 2008 +0100 tty: Cris has a nice RS485 ioctl so we should steal it This structure uses the __u32 types for some of its members, which leads to the following compile error: $ cc -I.../include -c X.c In file included from X.c:2: .../include/linux/serial.h:185: error: expected specifier-qualifier-list before ‘__u32’ $ It seems that these types are appropriate for this structure as it is to be exposed to userspace. These types are available via linux/types.h so move the include of that outside the __KERNEL__ section. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial.h b/include/linux/serial.h index 1ea8d9265bf6..9136cc5608c3 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -10,8 +10,9 @@ #ifndef _LINUX_SERIAL_H #define _LINUX_SERIAL_H -#ifdef __KERNEL__ #include + +#ifdef __KERNEL__ #include /* -- cgit v1.2.3 From f751928e0ddf54ea4fe5546f35e99efc5b5d9938 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Jan 2009 13:49:21 +0000 Subject: tty: We want the port object to be persistent Move the tty_port and uart_info bits around a little. By embedding the uart_info into the uart_port we get rid of lots of corner case testing and also get the ability to go port<->state<->info which is a bit more elegant than the current data structures. Downsides - we allocate a tiny bit more memory for unused ports, upside we've removed as much code as it saved for most users.. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial_core.h | 62 +++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index feb3b939ec4b..2395969faa04 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -315,36 +315,14 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; -/* - * This is the state information which is persistent across opens. - * The low level driver must not to touch any elements contained - * within. - */ -struct uart_state { - unsigned int close_delay; /* msec */ - unsigned int closing_wait; /* msec */ - -#define USF_CLOSING_WAIT_INF (0) -#define USF_CLOSING_WAIT_NONE (~0U) - - int count; - int pm_state; - struct uart_info *info; - struct uart_port *port; - - struct mutex mutex; -}; - -#define UART_XMIT_SIZE PAGE_SIZE - -typedef unsigned int __bitwise__ uif_t; - /* * This is the state information which is only valid when the port - * is open; it may be freed by the core driver once the device has + * is open; it may be cleared the core driver once the device has * been closed. Either the low level driver or the core can modify * stuff here. */ +typedef unsigned int __bitwise__ uif_t; + struct uart_info { struct tty_port port; struct circ_buf xmit; @@ -366,6 +344,29 @@ struct uart_info { wait_queue_head_t delta_msr_wait; }; +/* + * This is the state information which is persistent across opens. + * The low level driver must not to touch any elements contained + * within. + */ +struct uart_state { + unsigned int close_delay; /* msec */ + unsigned int closing_wait; /* msec */ + +#define USF_CLOSING_WAIT_INF (0) +#define USF_CLOSING_WAIT_NONE (~0U) + + int count; + int pm_state; + struct uart_info info; + struct uart_port *port; + + struct mutex mutex; +}; + +#define UART_XMIT_SIZE PAGE_SIZE + + /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 @@ -439,8 +440,13 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port); #define uart_circ_chars_free(circ) \ (CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE)) -#define uart_tx_stopped(portp) \ - ((portp)->info->port.tty->stopped || (portp)->info->port.tty->hw_stopped) +static inline int uart_tx_stopped(struct uart_port *port) +{ + struct tty_struct *tty = port->info->port.tty; + if(tty->stopped || tty->hw_stopped) + return 1; + return 0; +} /* * The following are helper functions for the low level drivers. @@ -451,7 +457,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) #ifdef SUPPORT_SYSRQ if (port->sysrq) { if (ch && time_before(jiffies, port->sysrq)) { - handle_sysrq(ch, port->info ? port->info->port.tty : NULL); + handle_sysrq(ch, port->info->port.tty); port->sysrq = 0; return 1; } -- cgit v1.2.3 From 7d6a07d123b62bf4fa71867420c23da3ca36c995 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 2 Jan 2009 13:49:47 +0000 Subject: 8250: Serial driver changes to support future Cavium OCTEON serial patches. In order to use Cavium OCTEON specific serial i/o drivers, we first patch the 8250 driver to use replaceable I/O functions. Compatible I/O functions are added for existing iotypeS. An added benefit of this change is that it makes it easy to factor some of the existing special cases out to board/SOC specific support code. The alternative is to load up 8250.c with a bunch of OCTEON specific iotype code and bug work-arounds. Signed-off-by: David Daney Signed-off-by: Tomaso Paoletti Signed-off-by: Andrew Morton Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial_8250.h | 2 ++ include/linux/serial_core.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 3d37c94abbc8..77d83d929f2c 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -28,6 +28,8 @@ struct plat_serial8250_port { unsigned char iotype; /* UPIO_* */ unsigned char hub6; upf_t flags; /* UPF_* flags */ + unsigned int (*serial_in)(struct uart_port *, int); + void (*serial_out)(struct uart_port *, int, int); }; /* diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 2395969faa04..60061f44f3d8 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -248,6 +248,8 @@ struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ + unsigned int (*serial_in)(struct uart_port *, int); + void (*serial_out)(struct uart_port *, int, int); unsigned int irq; /* irq number */ unsigned int uartclk; /* base uart clock */ unsigned int fifosize; /* tx fifo size */ -- cgit v1.2.3 From 8e23fcc89c8091790903927449f8efb9b4e23960 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 2 Jan 2009 13:49:54 +0000 Subject: Serial: Allow port type to be specified when calling serial8250_register_port. Add flag value UPF_FIXED_TYPE which specifies that the UART type is known and should not be probed. For this case the UARTs properties are just copied out of the uart_config entry. This allows us to keep SOC specific 8250 probe code out of 8250.c. In this case we know the serial hardware will not be changing as it is on the same silicon as the CPU, and we can specify it with certainty in the board/cpu setup code. The alternative is to load up 8250.c with a bunch of OCTEON specific special cases in the probing code. Signed-off-by: David Daney Signed-off-by: Andrew Morton Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial_8250.h | 1 + include/linux/serial_core.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 77d83d929f2c..d4d2a78ad43e 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -28,6 +28,7 @@ struct plat_serial8250_port { unsigned char iotype; /* UPIO_* */ unsigned char hub6; upf_t flags; /* UPF_* flags */ + unsigned int type; /* If UPF_FIXED_TYPE */ unsigned int (*serial_in)(struct uart_port *, int); void (*serial_out)(struct uart_port *, int, int); }; diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 60061f44f3d8..f155252f148c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -295,6 +295,8 @@ struct uart_port { #define UPF_MAGIC_MULTIPLIER ((__force upf_t) (1 << 16)) #define UPF_CONS_FLOW ((__force upf_t) (1 << 23)) #define UPF_SHARE_IRQ ((__force upf_t) (1 << 24)) +/* The exact UART type is known and should not be probed. */ +#define UPF_FIXED_TYPE ((__force upf_t) (1 << 27)) #define UPF_BOOT_AUTOCONF ((__force upf_t) (1 << 28)) #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) #define UPF_DEAD ((__force upf_t) (1 << 30)) -- cgit v1.2.3 From 6b06f19151c335ee0c5b61839fa4e6838182ebb8 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 2 Jan 2009 13:50:00 +0000 Subject: Serial: UART driver changes for Cavium OCTEON. Cavium UART implementation is not covered by existing uart_configS. Define a new uart_config (PORT_OCTEON) which is specified by OCTEON platform device registration code. Signed-off-by: Tomaso Paoletti Signed-off-by: David Daney Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial_core.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index f155252f148c..b4199841f1fc 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -40,7 +40,8 @@ #define PORT_NS16550A 14 #define PORT_XSCALE 15 #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ -#define PORT_MAX_8250 16 /* max port ID */ +#define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ +#define PORT_MAX_8250 17 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.3 From e65f0f8271b1b0452334e5da37fd35413a000de4 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Fri, 2 Jan 2009 13:50:43 +0000 Subject: serial_8250: support for Sealevel Systems Model 7803 COMM+8 Add support for Sealevel Systems Model 7803 COMM+8 Signed-off-by: Flavio Leitner Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index fa83dfefc5e0..218c73b1e6d4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1796,6 +1796,7 @@ #define PCI_DEVICE_ID_SEALEVEL_UCOMM232 0x7202 #define PCI_DEVICE_ID_SEALEVEL_COMM4 0x7401 #define PCI_DEVICE_ID_SEALEVEL_COMM8 0x7801 +#define PCI_DEVICE_ID_SEALEVEL_7803 0x7803 #define PCI_DEVICE_ID_SEALEVEL_UCOMM8 0x7804 #define PCI_VENDOR_ID_HYPERCOPE 0x1365 -- cgit v1.2.3 From aecde8b53b8ee1330a5a8206200f0d6b8845a6e0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 30 Dec 2008 07:14:19 -0300 Subject: V4L/DVB (10141): v4l2: debugging API changed to match against driver name instead of ID. Since the i2c driver ID will be removed in the near future we have to modify the v4l2 debugging API to use the driver name instead of driver ID. Note that this API is not used in applications other than v4l2-dbg.cpp as it is for debugging and testing only. Should anyone use the old VIDIOC_G_CHIP_IDENT, then this will be logged with a warning that it is deprecated and will be removed in 2.6.30. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 51 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 1f126e30766c..5571dbe1c0ad 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1370,25 +1370,41 @@ struct v4l2_streamparm { /* * A D V A N C E D D E B U G G I N G * - * NOTE: EXPERIMENTAL API + * NOTE: EXPERIMENTAL API, NEVER RELY ON THIS IN APPLICATIONS! + * FOR DEBUGGING, TESTING AND INTERNAL USE ONLY! */ /* VIDIOC_DBG_G_REGISTER and VIDIOC_DBG_S_REGISTER */ #define V4L2_CHIP_MATCH_HOST 0 /* Match against chip ID on host (0 for the host) */ -#define V4L2_CHIP_MATCH_I2C_DRIVER 1 /* Match against I2C driver ID */ +#define V4L2_CHIP_MATCH_I2C_DRIVER 1 /* Match against I2C driver name */ #define V4L2_CHIP_MATCH_I2C_ADDR 2 /* Match against I2C 7-bit address */ #define V4L2_CHIP_MATCH_AC97 3 /* Match against anciliary AC97 chip */ -struct v4l2_register { - __u32 match_type; /* Match type */ - __u32 match_chip; /* Match this chip, meaning determined by match_type */ +struct v4l2_dbg_match { + __u32 type; /* Match type */ + union { /* Match this chip, meaning determined by type */ + __u32 addr; + char name[32]; + }; +} __attribute__ ((packed)); + +struct v4l2_dbg_register { + struct v4l2_dbg_match match; + __u32 size; /* register size in bytes */ __u64 reg; __u64 val; -}; +} __attribute__ ((packed)); + +/* VIDIOC_DBG_G_CHIP_IDENT */ +struct v4l2_dbg_chip_ident { + struct v4l2_dbg_match match; + __u32 ident; /* chip identifier as specified in */ + __u32 revision; /* chip revision, chip specific */ +} __attribute__ ((packed)); -/* VIDIOC_G_CHIP_IDENT */ -struct v4l2_chip_ident { +/* VIDIOC_G_CHIP_IDENT_OLD: Deprecated, do not use */ +struct v4l2_chip_ident_old { __u32 match_type; /* Match type */ __u32 match_chip; /* Match this chip, meaning determined by match_type */ __u32 ident; /* chip identifier as specified in */ @@ -1460,13 +1476,22 @@ struct v4l2_chip_ident { #define VIDIOC_G_ENC_INDEX _IOR('V', 76, struct v4l2_enc_idx) #define VIDIOC_ENCODER_CMD _IOWR('V', 77, struct v4l2_encoder_cmd) #define VIDIOC_TRY_ENCODER_CMD _IOWR('V', 78, struct v4l2_encoder_cmd) +#endif -/* Experimental, only implemented if CONFIG_VIDEO_ADV_DEBUG is defined */ -#define VIDIOC_DBG_S_REGISTER _IOW('V', 79, struct v4l2_register) -#define VIDIOC_DBG_G_REGISTER _IOWR('V', 80, struct v4l2_register) - -#define VIDIOC_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_chip_ident) +#if 1 +/* Experimental, meant for debugging, testing and internal use. + Only implemented if CONFIG_VIDEO_ADV_DEBUG is defined. + You must be root to use these ioctls. Never use these in applications! */ +#define VIDIOC_DBG_S_REGISTER _IOW('V', 79, struct v4l2_dbg_register) +#define VIDIOC_DBG_G_REGISTER _IOWR('V', 80, struct v4l2_dbg_register) + +/* Experimental, meant for debugging, testing and internal use. + Never use this ioctl in applications! */ +#define VIDIOC_DBG_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_dbg_chip_ident) +/* This is deprecated and will go away in 2.6.30 */ +#define VIDIOC_G_CHIP_IDENT_OLD _IOWR('V', 81, struct v4l2_chip_ident_old) #endif + #define VIDIOC_S_HW_FREQ_SEEK _IOW('V', 82, struct v4l2_hw_freq_seek) /* Reminder: when adding new ioctls please add support for them to drivers/media/video/v4l2-compat-ioctl32.c as well! */ -- cgit v1.2.3 From cb889a2f3515b140bef193cf6ffcdb099349b8aa Mon Sep 17 00:00:00 2001 From: Klaus Schmidinger Date: Wed, 31 Dec 2008 14:11:23 -0300 Subject: V4L/DVB (10164): Add missing S2 caps flag to S2API The attached patch adds a capability flag that allows an application to determine whether a particular device can handle "second generation modulation" transponders. This is necessary in order for applications to be able to decide which device to use for a given channel in a multi device environment, where DVB-S and DVB-S2 devices are mixed. It is assumed that a device capable of handling "second generation modulation" can implicitly handle "first generation modulation". The flag is not named anything with DVBS2 in order to allow its use with future DVBT2 devices as well (should they ever come). Signed-off by: Klaus Schmidinger Acked-by: Steven Toth Signed-off-by: Mauro Carvalho Chehab --- include/linux/dvb/frontend.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h index 79a8ed8e6a7d..926d28d526e7 100644 --- a/include/linux/dvb/frontend.h +++ b/include/linux/dvb/frontend.h @@ -63,6 +63,7 @@ typedef enum fe_caps { FE_CAN_8VSB = 0x200000, FE_CAN_16VSB = 0x400000, FE_HAS_EXTENDED_CAPS = 0x800000, // We need more bitspace for newer APIs, indicate this. + FE_CAN_2G_MODULATION = 0x10000000, // frontend supports "2nd generation modulation" (DVB-S2) FE_NEEDS_BENDING = 0x20000000, // not supported anymore, don't use (frontend requires frequency bending) FE_CAN_RECOVER = 0x40000000, // frontend can recover from a cable unplug automatically FE_CAN_MUTE_TS = 0x80000000 // frontend can stop spurious TS data output -- cgit v1.2.3 From e4cda3e0728156c6be1d03e72ef20ea811da4ad5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 31 Dec 2008 14:26:57 -0300 Subject: V4L/DVB (10166): dvb frontend: stop using non-C99 compliant comments Signed-off-by: Mauro Carvalho Chehab --- include/linux/dvb/frontend.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h index 926d28d526e7..55026b1a40bd 100644 --- a/include/linux/dvb/frontend.h +++ b/include/linux/dvb/frontend.h @@ -62,11 +62,11 @@ typedef enum fe_caps { FE_CAN_HIERARCHY_AUTO = 0x100000, FE_CAN_8VSB = 0x200000, FE_CAN_16VSB = 0x400000, - FE_HAS_EXTENDED_CAPS = 0x800000, // We need more bitspace for newer APIs, indicate this. - FE_CAN_2G_MODULATION = 0x10000000, // frontend supports "2nd generation modulation" (DVB-S2) - FE_NEEDS_BENDING = 0x20000000, // not supported anymore, don't use (frontend requires frequency bending) - FE_CAN_RECOVER = 0x40000000, // frontend can recover from a cable unplug automatically - FE_CAN_MUTE_TS = 0x80000000 // frontend can stop spurious TS data output + FE_HAS_EXTENDED_CAPS = 0x800000, /* We need more bitspace for newer APIs, indicate this. */ + FE_CAN_2G_MODULATION = 0x10000000, /* frontend supports "2nd generation modulation" (DVB-S2) */ + FE_NEEDS_BENDING = 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */ + FE_CAN_RECOVER = 0x40000000, /* frontend can recover from a cable unplug automatically */ + FE_CAN_MUTE_TS = 0x80000000 /* frontend can stop spurious TS data output */ } fe_caps_t; @@ -122,15 +122,15 @@ typedef enum fe_sec_mini_cmd { typedef enum fe_status { - FE_HAS_SIGNAL = 0x01, /* found something above the noise level */ - FE_HAS_CARRIER = 0x02, /* found a DVB signal */ - FE_HAS_VITERBI = 0x04, /* FEC is stable */ - FE_HAS_SYNC = 0x08, /* found sync bytes */ - FE_HAS_LOCK = 0x10, /* everything's working... */ - FE_TIMEDOUT = 0x20, /* no lock within the last ~2 seconds */ - FE_REINIT = 0x40 /* frontend was reinitialized, */ -} fe_status_t; /* application is recommended to reset */ - /* DiSEqC, tone and parameters */ + FE_HAS_SIGNAL = 0x01, /* found something above the noise level */ + FE_HAS_CARRIER = 0x02, /* found a DVB signal */ + FE_HAS_VITERBI = 0x04, /* FEC is stable */ + FE_HAS_SYNC = 0x08, /* found sync bytes */ + FE_HAS_LOCK = 0x10, /* everything's working... */ + FE_TIMEDOUT = 0x20, /* no lock within the last ~2 seconds */ + FE_REINIT = 0x40 /* frontend was reinitialized, */ +} fe_status_t; /* application is recommended to reset */ + /* DiSEqC, tone and parameters */ typedef enum fe_spectral_inversion { INVERSION_OFF, -- cgit v1.2.3 From 6680598b44ed3c0052d155522eb21fc5a00de5f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 2 Jan 2009 18:53:14 +0100 Subject: Disallow gcc versions 3.{0,1} GCC 3.0 and 3.1 are too old to build a working kernel. Signed-off-by: Ingo Molnar [ This check got dropped as obsolete when I simplified the gcc header inclusion mess in f153b82121b0366fe0e5f9553545cce237335175, but Willy Tarreau reports actually having those old versions still.. -Linus ] Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc3.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h index 2befe6513ce4..8005effc04f1 100644 --- a/include/linux/compiler-gcc3.h +++ b/include/linux/compiler-gcc3.h @@ -2,6 +2,10 @@ #error "Please don't include directly, include instead." #endif +#if __GNUC_MINOR__ < 2 +# error Sorry, your compiler is too old - please upgrade it. +#endif + #if __GNUC_MINOR__ >= 3 # define __used __attribute__((__used__)) #else -- cgit v1.2.3 From 015ab17dc2e9de805c26e74f498b12ee5e8de07e Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 14:04:20 +0000 Subject: intel-iommu: remove some unused struct intel_iommu fields The seg, saved_msg and sysdev fields appear to be unused since before the code was first merged. linux/msi.h is not needed in linux/intel-iommu.h anymore since there is no longer a reference to struct msi_msg. The MSI code in drivers/pci/intel-iommu.c still has linux/msi.h included via linux/dmar.h. linux/sysdev.h isn't needed because there is no reference to struct sys_device. Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 3d017cfd245b..1bff7bf1bc2c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -23,8 +23,6 @@ #define _INTEL_IOMMU_H_ #include -#include -#include #include #include #include @@ -289,7 +287,6 @@ struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; u64 ecap; - int seg; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ @@ -302,8 +299,6 @@ struct intel_iommu { unsigned int irq; unsigned char name[7]; /* Device Name */ - struct msi_msg saved_msg; - struct sys_device sysdev; struct iommu_flush flush; #endif struct q_inval *qi; /* Queued invalidation info */ -- cgit v1.2.3 From 519a05491586dad04e687660e54c57882315b22b Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 14:21:13 +0000 Subject: intel-iommu: make init_dmars() static init_dmars() is not used outside of drivers/pci/intel-iommu.c Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 952df39c989d..cf92c4924b8c 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -141,7 +141,6 @@ struct device_domain_info { struct dmar_domain *domain; /* pointer to domain */ }; -extern int init_dmars(void); extern void free_dmar_iommu(struct intel_iommu *iommu); extern int dmar_disabled; -- cgit v1.2.3 From f27be03b271851fd54529f292c0f25b4c1f1a553 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:43 +0000 Subject: intel-iommu: move DMA_32/64BIT_PFN into intel-iommu.c Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index cf92c4924b8c..2e5a5c0b6acd 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,11 +9,6 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) - - /* * 0: Present * 1-11: Reserved -- cgit v1.2.3 From 46b08e1a76b758193b0e7b889c6486a16eb1e9e2 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:44 +0000 Subject: intel-iommu: move root entry defs from dma_remapping.h We keep the struct root_entry forward declaration for the pointer in struct intel_iommu. Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 2e5a5c0b6acd..d8521662a495 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,39 +9,7 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 val; - u64 rsvd1; -}; -#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val |= value & VTD_PAGE_MASK; -} - -struct context_entry; -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & VTD_PAGE_MASK) : - NULL); -} +struct root_entry; /* * low 64 bits: -- cgit v1.2.3 From 7a8fc25e0cc6e75fa6fdb0a856490e324218550b Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:45 +0000 Subject: intel-iommu: move context entry defs out from dma_remapping.h Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index d8521662a495..9a88f7d0262f 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -11,44 +11,6 @@ struct root_entry; -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & VTD_PAGE_MASK) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) -#define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) - /* * 0: readable * 1: writable -- cgit v1.2.3 From 622ba12a4c2148999bda9b891bfd0c6ddcb6c57e Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:46 +0000 Subject: intel-iommu: move DMA PTE defs out of dma_remapping.h DMA_PTE_READ/WRITE are needed by kvm. Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 9a88f7d0262f..9d5874e3bec9 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -11,31 +11,9 @@ struct root_entry; -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-11: available - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) - #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) - struct intel_iommu; struct dmar_domain { -- cgit v1.2.3 From 99126f7ce14aff5f9371b2fa81fddb82be815794 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:47 +0000 Subject: intel-iommu: move struct dmar_domain def out dma_remapping.h Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 9d5874e3bec9..333014468f17 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,30 +9,12 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) -struct root_entry; - #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) struct intel_iommu; - -struct dmar_domain { - int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; -}; +struct dmar_domain; +struct root_entry; /* PCI domain-device relationship */ struct device_domain_info { -- cgit v1.2.3 From a647dacbb1389aa6a5fa631766c1eaea35905890 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:48 +0000 Subject: intel-iommu: move struct device_domain_info out of dma_remapping.h Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 333014468f17..4ef5f6bc0d68 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -16,16 +16,6 @@ struct intel_iommu; struct dmar_domain; struct root_entry; -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ - u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ - struct dmar_domain *domain; /* pointer to domain */ -}; - extern void free_dmar_iommu(struct intel_iommu *iommu); extern int dmar_disabled; -- cgit v1.2.3 From 58fa7304a2c2bfd46e505c293ef779aa1d9715c2 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:49 +0000 Subject: intel-iommu: kill off duplicate def of dmar_disabled This is only used in dmar.c and intel-iommu.h, so dma_remapping.h seems like the appropriate place for it. Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dmar.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index f1984fc3e06d..f28440784cf0 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -144,7 +144,6 @@ struct dmar_rmrr_unit { list_for_each_entry(rmrr, &dmar_rmrr_units, list) /* Intel DMAR initialization functions */ extern int intel_iommu_init(void); -extern int dmar_disabled; #else static inline int intel_iommu_init(void) { -- cgit v1.2.3 From 2abd7e167c1b281f99bb58d302225872bfae9123 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:50 +0000 Subject: intel-iommu: move iommu_prepare_gfx_mapping() out of dma_remapping.h Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 4ef5f6bc0d68..7799a85614c1 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -20,11 +20,4 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); extern int dmar_disabled; -#ifndef CONFIG_DMAR_GFX_WA -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif /* !CONFIG_DMAR_GFX_WA */ - #endif -- cgit v1.2.3 From 1b5736839ae13dadc5947940144f95dd0f4a4a8c Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 15:34:06 +0800 Subject: calculate agaw for each iommu "SAGAW" capability may be different across iommus. Use a default agaw, but if default agaw is not supported in some iommus, choose a less supported agaw. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- include/linux/dma_remapping.h | 1 + include/linux/intel-iommu.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 7799a85614c1..136f170cecc2 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -17,6 +17,7 @@ struct dmar_domain; struct root_entry; extern void free_dmar_iommu(struct intel_iommu *iommu); +extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int dmar_disabled; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1bff7bf1bc2c..06349fd5871b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -290,6 +290,7 @@ struct intel_iommu { u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ + int agaw; /* agaw of this iommu */ #ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ -- cgit v1.2.3 From faa3d6f5ffe7bf60ebfd0d36513fbcda0eb0ea1a Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 23:09:29 +0800 Subject: Change intel iommu APIs of virtual machine domain These APIs are used by KVM to use VT-d Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 06349fd5871b..07973c4e4acc 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,15 +330,17 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -void intel_iommu_domain_exit(struct dmar_domain *domain); -struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); -int intel_iommu_context_mapping(struct dmar_domain *domain, - struct pci_dev *pdev); -int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot); -void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); -struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); -u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); +struct dmar_domain *intel_iommu_alloc_domain(void); +void intel_iommu_free_domain(struct dmar_domain *domain); +int intel_iommu_attach_device(struct dmar_domain *domain, + struct pci_dev *pdev); +void intel_iommu_detach_device(struct dmar_domain *domain, + struct pci_dev *pdev); +int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot); +void intel_iommu_unmap_address(struct dmar_domain *domain, + dma_addr_t iova, size_t size); +u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova); #ifdef CONFIG_DMAR int intel_iommu_found(void); -- cgit v1.2.3 From 260782bcfdaaa7850f29d6bb2ec6603019168c57 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Tue, 2 Dec 2008 21:03:39 +0800 Subject: KVM: use the new intel iommu APIs intel iommu APIs are updated, use the new APIs. In addition, change kvm_iommu_map_guest() to just create the domain, let kvm_iommu_assign_device() assign device. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eafabd5c66b2..c96739b4b7a3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -330,9 +330,10 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -int kvm_iommu_map_guest(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_iommu_map_guest(struct kvm *kvm); int kvm_iommu_unmap_guest(struct kvm *kvm); +int kvm_assign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); #else /* CONFIG_DMAR */ static inline int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, @@ -341,9 +342,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm, return 0; } -static inline int kvm_iommu_map_guest(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) +static inline int kvm_iommu_map_guest(struct kvm *kvm) { return -ENODEV; } @@ -352,6 +351,12 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm) { return 0; } + +static inline int kvm_assign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + return 0; +} #endif /* CONFIG_DMAR */ static inline void kvm_guest_enter(void) -- cgit v1.2.3 From 0a920356748df4fb06e86c21c23d2ed6d31d37ad Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Tue, 2 Dec 2008 21:24:23 +0800 Subject: KVM: support device deassignment Support device deassignment, it can be used in device hotplug. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c96739b4b7a3..ce5d1c17ce26 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -334,6 +334,8 @@ int kvm_iommu_map_guest(struct kvm *kvm); int kvm_iommu_unmap_guest(struct kvm *kvm); int kvm_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_deassign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); #else /* CONFIG_DMAR */ static inline int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, @@ -357,6 +359,12 @@ static inline int kvm_assign_device(struct kvm *kvm, { return 0; } + +static inline int kvm_deassign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + return 0; +} #endif /* CONFIG_DMAR */ static inline void kvm_guest_enter(void) -- cgit v1.2.3 From b653574a7d14b663cc812cb20be6a114939ba186 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 23:29:53 +0800 Subject: Deassign device in kvm_free_assgined_device In kvm_iommu_unmap_memslots(), assigned_dev_head is already empty. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ce5d1c17ce26..e62a4629e51c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -316,6 +316,7 @@ struct kvm_assigned_dev_kernel { #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) unsigned long irq_requested_type; int irq_source_id; + int flags; struct pci_dev *dev; struct kvm *kvm; }; -- cgit v1.2.3 From 4a77a6cf6d9bf9f5c74b27f62bd2bfe6dcc88392 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Nov 2008 17:02:33 +0100 Subject: introcude linux/iommu.h for an iommu api This patch introduces the API to abstract the exported VT-d functions for KVM into a generic API. This way the AMD IOMMU implementation can plug into this API later. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 include/linux/iommu.h (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h new file mode 100644 index 000000000000..8a7bfb1b6ca0 --- /dev/null +++ b/include/linux/iommu.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __LINUX_IOMMU_H +#define __LINUX_IOMMU_H + +#define IOMMU_READ (1) +#define IOMMU_WRITE (2) + +struct device; + +struct iommu_domain { + void *priv; +}; + +struct iommu_ops { + int (*domain_init)(struct iommu_domain *domain); + void (*domain_destroy)(struct iommu_domain *domain); + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*map)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); + void (*unmap)(struct iommu_domain *domain, unsigned long iova, + size_t size); + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, + unsigned long iova); +}; + +#ifdef CONFIG_IOMMU_API + +extern void register_iommu(struct iommu_ops *ops); +extern bool iommu_found(void); +extern struct iommu_domain *iommu_domain_alloc(void); +extern void iommu_domain_free(struct iommu_domain *domain); +extern int iommu_attach_device(struct iommu_domain *domain, + struct device *dev); +extern void iommu_detach_device(struct iommu_domain *domain, + struct device *dev); +extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); +extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, + size_t size); +extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long iova); + +#else /* CONFIG_IOMMU_API */ + +static inline void register_iommu(struct iommu_ops *ops) +{ +} + +static inline bool iommu_found(void) +{ + return false; +} + +static inline struct iommu_domain *iommu_domain_alloc(void) +{ + return NULL; +} + +static inline void iommu_domain_free(struct iommu_domain *domain) +{ +} + +static inline int iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + return -ENODEV; +} + +static inline void iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ +} + +static inline int iommu_map_range(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t size, int prot) +{ + return -ENODEV; +} + +static inline void iommu_unmap_range(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ +} + +static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long iova) +{ + return 0; +} + +#endif /* CONFIG_IOMMU_API */ + +#endif /* __LINUX_IOMMU_H */ -- cgit v1.2.3 From 19de40a8472fa64693eab844911eec277d489f6c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 14:43:34 +0100 Subject: KVM: change KVM to use IOMMU API Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e62a4629e51c..ec49d0be7f52 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -328,7 +328,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -#ifdef CONFIG_DMAR +#ifdef CONFIG_IOMMU_API int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); int kvm_iommu_map_guest(struct kvm *kvm); @@ -337,7 +337,7 @@ int kvm_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); int kvm_deassign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); -#else /* CONFIG_DMAR */ +#else /* CONFIG_IOMMU_API */ static inline int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages) @@ -366,7 +366,7 @@ static inline int kvm_deassign_device(struct kvm *kvm, { return 0; } -#endif /* CONFIG_DMAR */ +#endif /* CONFIG_IOMMU_API */ static inline void kvm_guest_enter(void) { -- cgit v1.2.3 From 5d450806eb0e569c5846a5825e7f535980b0da32 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 14:52:32 +0100 Subject: VT-d: adapt domain init and destroy functions for IOMMU API Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 07973c4e4acc..0a7ba0cefc74 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,8 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -struct dmar_domain *intel_iommu_alloc_domain(void); -void intel_iommu_free_domain(struct dmar_domain *domain); int intel_iommu_attach_device(struct dmar_domain *domain, struct pci_dev *pdev); void intel_iommu_detach_device(struct dmar_domain *domain, -- cgit v1.2.3 From 4c5478c94eb29e6101f1f13175f7455bc8b5d953 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 14:58:24 +0100 Subject: VT-d: adapt device attach and detach functions for IOMMU API Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0a7ba0cefc74..9909c5a1b20f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,10 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -int intel_iommu_attach_device(struct dmar_domain *domain, - struct pci_dev *pdev); -void intel_iommu_detach_device(struct dmar_domain *domain, - struct pci_dev *pdev); int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, u64 hpa, size_t size, int prot); void intel_iommu_unmap_address(struct dmar_domain *domain, -- cgit v1.2.3 From dde57a210dcdce85e2813bab8f88687761d9f6a6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 15:04:09 +0100 Subject: VT-d: adapt domain map and unmap functions for IOMMU API Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 9909c5a1b20f..6bc26e03858c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,10 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot); -void intel_iommu_unmap_address(struct dmar_domain *domain, - dma_addr_t iova, size_t size); u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova); #ifdef CONFIG_DMAR -- cgit v1.2.3 From d14d65777c2491dd5baf1e17f444b8f653f3cbb1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 15:06:57 +0100 Subject: VT-d: adapt domain iova_to_phys function for IOMMU API Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 6bc26e03858c..26ccc0294567 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,8 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova); - #ifdef CONFIG_DMAR int intel_iommu_found(void); #else /* CONFIG_DMAR */ -- cgit v1.2.3 From e4754c96cf8b82a754dc5ba791d6c0bf1fbe8e8e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 15:26:42 +0100 Subject: VT-d: remove now unused intel_iommu_found function Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 26ccc0294567..c4f6c101dbcd 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,15 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -#ifdef CONFIG_DMAR -int intel_iommu_found(void); -#else /* CONFIG_DMAR */ -static inline int intel_iommu_found(void) -{ - return 0; -} -#endif /* CONFIG_DMAR */ - extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int); -- cgit v1.2.3 From 87d8fe1ee6b8d2f95076142d58c440dba4e7bdc2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 09:47:09 -0500 Subject: add releasepage hooks to block devices which can be used by file systems Implement blkdev_releasepage() to release the buffer_heads and pages after we release private data belonging to a mounted filesystem. Cc: Toshiyuki Okajima Cc: linux-fsdevel@vger.kernel.org Signed-off-by: "Theodore Ts'o" --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f2a3010140e3..0f54ae0f0ccd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -565,6 +565,7 @@ struct address_space { struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ struct inode * bd_inode; /* will die */ + struct super_block * bd_super; int bd_openers; struct mutex bd_mutex; /* open/close mutex */ struct semaphore bd_mount_sem; @@ -1385,6 +1386,7 @@ struct super_operations { ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); #endif + int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); }; /* -- cgit v1.2.3 From 2f983570010a0dcb26d988da02d7ccfad00c807c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Jan 2009 00:06:34 -0800 Subject: sparseirq: move set/get_timer_rand_state back to .c those two functions only used in that C file Signed-off-by: Yinghai Lu Signed-off-by: Linus Torvalds --- include/linux/random.h | 50 -------------------------------------------------- 1 file changed, 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index adbf3bd3c6b3..407ea3646f8f 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -45,56 +45,6 @@ struct rand_pool_info { extern void rand_initialize_irq(int irq); -struct timer_rand_state; -#ifndef CONFIG_SPARSE_IRQ - -extern struct timer_rand_state *irq_timer_state[]; - -static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - if (irq >= nr_irqs) - return NULL; - - return irq_timer_state[irq]; -} - -static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) -{ - if (irq >= nr_irqs) - return; - - irq_timer_state[irq] = state; -} - -#else - -#include -static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - if (!desc) - return NULL; - - return desc->timer_rand_state; -} - -static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - if (!desc) - return; - - desc->timer_rand_state = state; -} -#endif - - extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); extern void add_interrupt_randomness(int irq); -- cgit v1.2.3 From 9188e79ec3fd43a0a605274324aecfb731baa88b Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 12 Nov 2008 16:14:08 +0100 Subject: HID: add phys and name ioctls to hidraw The hiddev interface provides ioctl() calls which can be used to obtain phys and raw name of the underlying device. Add the corresponding support also into hidraw. Signed-off-by: Jiri Kosina --- include/linux/hidraw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hidraw.h b/include/linux/hidraw.h index dbb5c8c374f0..dd8d69269176 100644 --- a/include/linux/hidraw.h +++ b/include/linux/hidraw.h @@ -33,6 +33,8 @@ struct hidraw_devinfo { #define HIDIOCGRDESCSIZE _IOR('H', 0x01, int) #define HIDIOCGRDESC _IOR('H', 0x02, struct hidraw_report_descriptor) #define HIDIOCGRAWINFO _IOR('H', 0x03, struct hidraw_devinfo) +#define HIDIOCGRAWNAME(len) _IOC(_IOC_READ, 'H', 0x04, len) +#define HIDIOCGRAWPHYS(len) _IOC(_IOC_READ, 'H', 0x05, len) #define HIDRAW_FIRST_MINOR 0 #define HIDRAW_MAX_DEVICES 64 -- cgit v1.2.3 From 0ed94b334265b6ee3e3336b4fedacfa9cb2ccaba Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 24 Nov 2008 16:20:07 +0100 Subject: HID: move usbhid flags to usbhid.h Move usbhid specific flags from global hid.h into local usbhid.h. Signed-off-by: Jiri Slaby Signed-off-by: Jiri Kosina --- include/linux/hid.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index e5780f8c934a..2c20f20283b2 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -403,15 +403,6 @@ struct hid_output_fifo { #define HID_STAT_ADDED 1 #define HID_STAT_PARSED 2 -#define HID_CTRL_RUNNING 1 -#define HID_OUT_RUNNING 2 -#define HID_IN_RUNNING 3 -#define HID_RESET_PENDING 4 -#define HID_SUSPENDED 5 -#define HID_CLEAR_HALT 6 -#define HID_DISCONNECTED 7 -#define HID_STARTED 8 - struct hid_input { struct list_head list; struct hid_report *report; -- cgit v1.2.3 From 3a6f82f7a22cf19687f556997c6978b31c109360 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 24 Nov 2008 16:20:09 +0100 Subject: HID: add dynids facility Allow adding new devices to the hid drivers on the fly without a need of kernel recompilation. Now, one can test a driver e.g. by: echo 0003:045E:00F0.0003 > ../generic-usb/unbind echo 0003 045E 00F0 > new_id from some driver subdir. Signed-off-by: Jiri Slaby Signed-off-by: Jiri Kosina --- include/linux/hid.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 2c20f20283b2..215035bbb288 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -531,6 +531,8 @@ struct hid_usage_id { * @name: driver name (e.g. "Footech_bar-wheel") * @id_table: which devices is this driver for (must be non-NULL for probe * to be called) + * @dyn_list: list of dynamically added device ids + * @dyn_lock: lock protecting @dyn_list * @probe: new device inserted * @remove: device removed (NULL if not a hot-plug capable driver) * @report_table: on which reports to call raw_event (NULL means all) @@ -558,6 +560,9 @@ struct hid_driver { char *name; const struct hid_device_id *id_table; + struct list_head dyn_list; + spinlock_t dyn_lock; + int (*probe)(struct hid_device *dev, const struct hid_device_id *id); void (*remove)(struct hid_device *dev); -- cgit v1.2.3 From 725cf0f47dbb02e0482f081828cff73f55479b79 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Tue, 16 Dec 2008 14:20:23 +0100 Subject: HID: avoid sparse warning in HID_COMPAT_LOAD_DRIVER Impact: include a prototype for the exported function in the macro Fix about 20 of this warnings: drivers/hid/hid-a4tech.c:162:1: warning: symbol 'hid_compat_a4tech' was not declared. Should it be static? Signed-off-by: Hannes Eder Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 215035bbb288..81aa84d60c6b 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -793,6 +793,8 @@ dbg_hid(const char *fmt, ...) #ifdef CONFIG_HID_COMPAT #define HID_COMPAT_LOAD_DRIVER(name) \ +/* prototype to avoid sparse warning */ \ +extern void hid_compat_##name(void); \ void hid_compat_##name(void) { } \ EXPORT_SYMBOL(hid_compat_##name) #else -- cgit v1.2.3 From c31910672376dfb8d020e32afa7249763bcd924a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 11:14:25 -0500 Subject: ext4: Remove code to create the journal inode This code has been obsolete in quite some time, since the supported method for adding a journal inode is to use tune2fs (or to creating new filesystem with a journal via mke2fs or mkfs.ext4). Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9d82084a1605..adef1c9940d3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1104,7 +1104,6 @@ extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern void jbd2_journal_clear_features (journal_t *, unsigned long, unsigned long, unsigned long); -extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_load (journal_t *journal); extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); -- cgit v1.2.3 From c66b9906f863696159e05890bb7123269bb9a9de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 4 Jan 2009 10:55:02 +0100 Subject: intel-iommu: fix build error with INTR_REMAP=y and DMAR=n dmar.o can be built in the CONFIG_INTR_REMAP=y case but iommu_calculate_agaw() is only available if VT-d is built as well. So create an inline version of iommu_calculate_agaw() for the !CONFIG_DMAR case. The iommu->agaw value wont be used in this case, but the code is cleaner (has less #ifdefs) if we have it around unconditionally. Signed-off-by: Ingo Molnar --- include/linux/dma_remapping.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 136f170cecc2..af1dab41674b 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -17,7 +17,15 @@ struct dmar_domain; struct root_entry; extern void free_dmar_iommu(struct intel_iommu *iommu); + +#ifdef CONFIG_DMAR extern int iommu_calculate_agaw(struct intel_iommu *iommu); +#else +static inline int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return 0; +} +#endif extern int dmar_disabled; -- cgit v1.2.3 From 0c8a601678960fbcc1c1185a283d6d107575810b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 8 Nov 2008 01:10:16 +0100 Subject: mfd: Add WM8350 revision H support No other software changes are required. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 6ebf97f2a475..9490ec175d5a 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -536,6 +536,7 @@ #define WM8350_REV_E 0x4 #define WM8350_REV_F 0x5 #define WM8350_REV_G 0x6 +#define WM8350_REV_H 0x7 #define WM8350_NUM_IRQ 63 -- cgit v1.2.3 From 67488526349d043372d141c054f4dc6313780b3c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 8 Nov 2008 01:10:21 +0100 Subject: mfd: Add AUXADC support for WM8350 The auxiliary ADC in the WM8350 is shared between several subdevices so access to it needs to be arbitrated by the core driver. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/comparator.h | 8 ++++++++ include/linux/mfd/wm8350/core.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/comparator.h b/include/linux/mfd/wm8350/comparator.h index 053788649452..54bc5d0fd502 100644 --- a/include/linux/mfd/wm8350/comparator.h +++ b/include/linux/mfd/wm8350/comparator.h @@ -164,4 +164,12 @@ #define WM8350_AUXADC_BATT 6 #define WM8350_AUXADC_TEMP 7 +struct wm8350; + +/* + * AUX ADC Readback + */ +int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, + int vref); + #endif diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 9490ec175d5a..cc190055b9c4 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -573,6 +573,8 @@ struct wm8350 { void *src); u16 *reg_cache; + struct mutex auxadc_mutex; + /* Interrupt handling */ struct work_struct irq_work; struct mutex irq_mutex; /* IRQ table mutex */ -- cgit v1.2.3 From 3fba19ec1ae5b460c73a7f32efed8d3b3300b246 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sat, 8 Nov 2008 01:13:16 +0100 Subject: mfd: allow reading entire register banks on twl4030 Minor change to the TWL4030 utility interface: support reads of all 256 bytes in each register bank (vs just 255). This can help when debugging, but is otherwise a NOP. Signed-off-by: David Brownell Signed-off-by: Samuel Ortiz --- include/linux/i2c/twl4030.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index fb604dcd38f1..ae25c907b7cf 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -78,8 +78,8 @@ int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg); * IMPORTANT: For twl4030_i2c_write(), allocate num_bytes + 1 * for the value, and populate your data starting at offset 1. */ -int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes); -int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes); +int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes); +int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes); /*----------------------------------------------------------------------*/ -- cgit v1.2.3 From 14431aa0c5a443d13d24e6f865a8838f97dab973 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 16 Nov 2008 20:16:47 +0100 Subject: power_supply: Add support for WM8350 PMU This patch adds support for the PMU provided by the WM8350 which implements battery, line and USB supplies including a battery charger. The hardware functions largely autonomously, with minimal software control required to initiate fast charging. Support for configuration of the USB supply is not yet implemented. This means that the hardware will remain in the mode configured at startup, by default limiting the current drawn from USB to 100mA. This driver was originally written by Liam Girdwood with subsequent updates for submission by Mark Brown. Signed-off-by: Mark Brown Acked-by: Anton Vorontsov Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/core.h | 26 ++++++++++++++++++++++++++ include/linux/mfd/wm8350/supply.h | 23 ++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index cc190055b9c4..d2614dfc9397 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -57,6 +57,9 @@ #define WM8350_OVER_CURRENT_INT_STATUS_MASK 0x25 #define WM8350_GPIO_INT_STATUS_MASK 0x26 #define WM8350_COMPARATOR_INT_STATUS_MASK 0x27 +#define WM8350_MISC_OVERRIDES 0xE3 +#define WM8350_COMPARATOR_OVERRIDES 0xE7 +#define WM8350_STATE_MACHINE_STATUS 0xE9 #define WM8350_MAX_REGISTER 0xFF @@ -523,6 +526,29 @@ #define WM8350_DC2_STS 0x0002 #define WM8350_DC1_STS 0x0001 +/* + * R227 (0xE3) - Misc Overrides + */ +#define WM8350_USB_LIMIT_OVRDE 0x0400 + +/* + * R227 (0xE7) - Comparator Overrides + */ +#define WM8350_USB_FB_OVRDE 0x8000 +#define WM8350_WALL_FB_OVRDE 0x4000 +#define WM8350_BATT_FB_OVRDE 0x2000 + + +/* + * R233 (0xE9) - State Machinine Status + */ +#define WM8350_USB_SM_MASK 0x0700 +#define WM8350_USB_SM_SHIFT 8 + +#define WM8350_USB_SM_100_SLV 1 +#define WM8350_USB_SM_500_SLV 5 +#define WM8350_USB_SM_STDBY_SLV 7 + /* WM8350 wake up conditions */ #define WM8350_IRQ_WKUP_OFF_STATE 43 #define WM8350_IRQ_WKUP_HIB_STATE 44 diff --git a/include/linux/mfd/wm8350/supply.h b/include/linux/mfd/wm8350/supply.h index 1c8f3cde79b0..79721513fa9f 100644 --- a/include/linux/mfd/wm8350/supply.h +++ b/include/linux/mfd/wm8350/supply.h @@ -13,7 +13,8 @@ #ifndef __LINUX_MFD_WM8350_SUPPLY_H_ #define __LINUX_MFD_WM8350_SUPPLY_H_ -#include +#include +#include /* * Charger registers @@ -104,8 +105,28 @@ #define WM8350_IRQ_EXT_WALL_FB 37 #define WM8350_IRQ_EXT_BAT_FB 38 +/* + * Policy to control charger state machine. + */ +struct wm8350_charger_policy { + + /* charger state machine policy - set in machine driver */ + int eoc_mA; /* end of charge current (mA) */ + int charge_mV; /* charge voltage */ + int fast_limit_mA; /* fast charge current limit */ + int fast_limit_USB_mA; /* USB fast charge current limit */ + int charge_timeout; /* charge timeout (mins) */ + int trickle_start_mV; /* trickle charge starts at mV */ + int trickle_charge_mA; /* trickle charge current */ + int trickle_charge_USB_mA; /* USB trickle charge current */ +}; + struct wm8350_power { struct platform_device *pdev; + struct power_supply battery; + struct power_supply usb; + struct power_supply ac; + struct wm8350_charger_policy *policy; }; #endif -- cgit v1.2.3 From d756f4a4446227ca9626087939a6769ca55ab036 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 24 Nov 2008 20:20:30 +0100 Subject: mfd: Switch WM8350 revision detection to a feature based model Rather than check for chip revisions in the WM8350 drivers have the core code set flags for relevant differences. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/core.h | 2 -- include/linux/mfd/wm8350/supply.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index d2614dfc9397..3c9735663f36 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -585,8 +585,6 @@ struct wm8350_irq { }; struct wm8350 { - int rev; /* chip revision */ - struct device *dev; /* device IO */ diff --git a/include/linux/mfd/wm8350/supply.h b/include/linux/mfd/wm8350/supply.h index 79721513fa9f..2b9479310bbd 100644 --- a/include/linux/mfd/wm8350/supply.h +++ b/include/linux/mfd/wm8350/supply.h @@ -127,6 +127,8 @@ struct wm8350_power { struct power_supply usb; struct power_supply ac; struct wm8350_charger_policy *policy; + + int rev_g_coeff; }; #endif -- cgit v1.2.3 From b797a5551979da22b0a35632198ffc8a330d9537 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 24 Nov 2008 20:22:58 +0100 Subject: mfd: Refactor WM8350 chip identification Since the WM8350 driver was originally written the semantics for the identification registers of the chip have been clarified, allowing us to do an exact match on all the fields. This avoids mistakenly running on unsupported hardware. Also change to using the datasheet names more consistently for legibility and fix a printk() that should be dev_err(). Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 3c9735663f36..2a7abeebe777 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -29,6 +29,7 @@ */ #define WM8350_RESET_ID 0x00 #define WM8350_ID 0x01 +#define WM8350_REVISION 0x02 #define WM8350_SYSTEM_CONTROL_1 0x03 #define WM8350_SYSTEM_CONTROL_2 0x04 #define WM8350_SYSTEM_HIBERNATE 0x05 @@ -79,6 +80,11 @@ #define WM8350_CONF_STS_MASK 0x0C00 #define WM8350_CUST_ID_MASK 0x00FF +/* + * R2 (0x02) - Revision + */ +#define WM8350_MASK_REV_MASK 0x00FF + /* * R3 (0x03) - System Control 1 */ -- cgit v1.2.3 From 7e386e6e0e4f34f0545e8923e22fe4dd61ef9d48 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 30 Nov 2008 22:43:21 +0100 Subject: power_supply: Add cold to the POWER_SUPPLY_HEALTH report values Some systems are able to report problems with batteries being under temperature. Signed-off-by: Mark Brown Acked-by: Anton Vorontsov Signed-off-by: Samuel Ortiz --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f9348cba6dc1..8ff25e0e7f7a 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -45,6 +45,7 @@ enum { POWER_SUPPLY_HEALTH_DEAD, POWER_SUPPLY_HEALTH_OVERVOLTAGE, POWER_SUPPLY_HEALTH_UNSPEC_FAILURE, + POWER_SUPPLY_HEALTH_COLD, }; enum { -- cgit v1.2.3 From 4008e879e1325c29362aa2c3fa4b527273ae15a8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 30 Nov 2008 22:45:14 +0100 Subject: power_supply: Add battery health reporting for WM8350 Implement support for reporting battery health in the WM8350 battery interface. Since we are now able to report this via the classs remove the diagnostics from the interrupt handler. Signed-off-by: Mark Brown Acked-by: Anton Vorontsov Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/core.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 2a7abeebe777..afeff6f1316c 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -58,6 +58,7 @@ #define WM8350_OVER_CURRENT_INT_STATUS_MASK 0x25 #define WM8350_GPIO_INT_STATUS_MASK 0x26 #define WM8350_COMPARATOR_INT_STATUS_MASK 0x27 +#define WM8350_CHARGER_OVERRIDES 0xE2 #define WM8350_MISC_OVERRIDES 0xE3 #define WM8350_COMPARATOR_OVERRIDES 0xE7 #define WM8350_STATE_MACHINE_STATUS 0xE9 @@ -532,6 +533,12 @@ #define WM8350_DC2_STS 0x0002 #define WM8350_DC1_STS 0x0001 +/* + * R226 (0xE2) - Charger status + */ +#define WM8350_CHG_BATT_HOT_OVRDE 0x8000 +#define WM8350_CHG_BATT_COLD_OVRDE 0x4000 + /* * R227 (0xE3) - Misc Overrides */ -- cgit v1.2.3 From 67460a7c26271fd7a32e5d51b2c806a84ce78a62 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 1 Dec 2008 00:35:33 +0100 Subject: mfd: twl4030: cleanup symbols and OMAP dependency Finish removing dependency of TWL driver stack on platform-specific IRQ definitions ... and remove the build dependency on OMAP. This lets the TWL4030 code be included in test builds for most platforms, and will make it easier for non-OMAP folk to update most of this code for new APIs etc. Signed-off-by: David Brownell Signed-off-by: Tony Lindgren Signed-off-by: Samuel Ortiz --- include/linux/i2c/twl4030.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index ae25c907b7cf..d4846695bcd0 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -285,33 +285,6 @@ struct twl4030_platform_data { int twl4030_sih_setup(int module); -/* - * FIXME completely stop using TWL4030_IRQ_BASE ... instead, pass the - * IRQ data to subsidiary devices using platform device resources. - */ - -/* IRQ information-need base */ -#include -/* TWL4030 interrupts */ - -/* #define TWL4030_MODIRQ_GPIO (TWL4030_IRQ_BASE + 0) */ -#define TWL4030_MODIRQ_KEYPAD (TWL4030_IRQ_BASE + 1) -#define TWL4030_MODIRQ_BCI (TWL4030_IRQ_BASE + 2) -#define TWL4030_MODIRQ_MADC (TWL4030_IRQ_BASE + 3) -/* #define TWL4030_MODIRQ_USB (TWL4030_IRQ_BASE + 4) */ -/* #define TWL4030_MODIRQ_PWR (TWL4030_IRQ_BASE + 5) */ - -#define TWL4030_PWRIRQ_PWRBTN (TWL4030_PWR_IRQ_BASE + 0) -/* #define TWL4030_PWRIRQ_CHG_PRES (TWL4030_PWR_IRQ_BASE + 1) */ -/* #define TWL4030_PWRIRQ_USB_PRES (TWL4030_PWR_IRQ_BASE + 2) */ -/* #define TWL4030_PWRIRQ_RTC (TWL4030_PWR_IRQ_BASE + 3) */ -/* #define TWL4030_PWRIRQ_HOT_DIE (TWL4030_PWR_IRQ_BASE + 4) */ -/* #define TWL4030_PWRIRQ_PWROK_TIMEOUT (TWL4030_PWR_IRQ_BASE + 5) */ -/* #define TWL4030_PWRIRQ_MBCHG (TWL4030_PWR_IRQ_BASE + 6) */ -/* #define TWL4030_PWRIRQ_SC_DETECT (TWL4030_PWR_IRQ_BASE + 7) */ - -/* Rest are unsued currently*/ - /* Offsets to Power Registers */ #define TWL4030_VDAC_DEV_GRP 0x3B #define TWL4030_VDAC_DEDICATED 0x3E @@ -322,10 +295,6 @@ int twl4030_sih_setup(int module); #define TWL4030_VAUX3_DEV_GRP 0x1F #define TWL4030_VAUX3_DEDICATED 0x22 -/* TWL4030 GPIO interrupt definitions */ - -#define TWL4030_GPIO_IRQ_NO(n) (TWL4030_GPIO_IRQ_BASE + (n)) - /* * Exported TWL4030 GPIO APIs * -- cgit v1.2.3 From dad759ff8ba79927766e3f0159bfc5fb6de0f982 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 1 Dec 2008 00:43:58 +0100 Subject: mfd: twl4030: create some regulator devices Initial code to create twl4030 voltage regulator devices, using the new regulator framework. Note that this now starts to care what name is used to declare the TWL chip: - TWL4030 is the "old" chip; newer ones have a bigger variety of VAUX2 voltages. - TWL5030 is the core "new" chip; TPS65950 is its catalog version. - The TPS65930 and TPS65920 are cost-reduced catalog versions of TWL5030 parts ... fewer regulators, no battery charger, etc. Board-specific regulator configuration should be provided, listing which regulators are used and their constraints (e.g. 1.8V only). Code that could ("should"?) leverage the regulator stuff includes TWL4030 USB transceiver support and MMC glue, LCD support for the 3430SDP and Labrador boards, and S-Video output. Signed-off-by: David Brownell Signed-off-by: Tony Lindgren Signed-off-by: Samuel Ortiz --- include/linux/i2c/twl4030.h | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index d4846695bcd0..e06555d40d35 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -278,6 +278,18 @@ struct twl4030_platform_data { struct twl4030_keypad_data *keypad; struct twl4030_usb_data *usb; + /* LDO regulators */ + struct regulator_init_data *vdac; + struct regulator_init_data *vpll1; + struct regulator_init_data *vpll2; + struct regulator_init_data *vmmc1; + struct regulator_init_data *vmmc2; + struct regulator_init_data *vsim; + struct regulator_init_data *vaux1; + struct regulator_init_data *vaux2; + struct regulator_init_data *vaux3; + struct regulator_init_data *vaux4; + /* REVISIT more to come ... _nothing_ should be hard-wired */ }; @@ -309,4 +321,39 @@ int twl4030_set_gpio_debounce(int gpio, int enable); static inline int twl4030charger_usb_en(int enable) { return 0; } #endif +/*----------------------------------------------------------------------*/ + +/* Linux-specific regulator identifiers ... for now, we only support + * the LDOs, and leave the three buck converters alone. VDD1 and VDD2 + * need to tie into hardware based voltage scaling (cpufreq etc), while + * VIO is generally fixed. + */ + +/* EXTERNAL dc-to-dc buck converters */ +#define TWL4030_REG_VDD1 0 +#define TWL4030_REG_VDD2 1 +#define TWL4030_REG_VIO 2 + +/* EXTERNAL LDOs */ +#define TWL4030_REG_VDAC 3 +#define TWL4030_REG_VPLL1 4 +#define TWL4030_REG_VPLL2 5 /* not on all chips */ +#define TWL4030_REG_VMMC1 6 +#define TWL4030_REG_VMMC2 7 /* not on all chips */ +#define TWL4030_REG_VSIM 8 /* not on all chips */ +#define TWL4030_REG_VAUX1 9 /* not on all chips */ +#define TWL4030_REG_VAUX2_4030 10 /* (twl4030-specific) */ +#define TWL4030_REG_VAUX2 11 /* (twl5030 and newer) */ +#define TWL4030_REG_VAUX3 12 /* not on all chips */ +#define TWL4030_REG_VAUX4 13 /* not on all chips */ + +/* INTERNAL LDOs */ +#define TWL4030_REG_VINTANA1 14 +#define TWL4030_REG_VINTANA2 15 +#define TWL4030_REG_VINTDIG 16 +#define TWL4030_REG_VUSB1V5 17 +#define TWL4030_REG_VUSB1V8 18 +#define TWL4030_REG_VUSB3V1 19 +#define TWL4030_REG_VUSBCP 20 + #endif /* End of __TWL4030_H */ -- cgit v1.2.3 From b73eac7871d002835be17d4602cced2c15c0db4b Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sun, 7 Dec 2008 19:10:58 +0100 Subject: mfd: twl4030 regulator bug fixes This contains two bugfixes to the initial twl4030 regulator support patch related to USB: (a) always overwrite the old list of consumers ... else the regulator handles all use the same "usb1v5" name; (b) don't set up the "usbcp" regulator, which turns out to be managed through separate controls, usually ULPI directly from the OTG controller. Signed-off-by: David Brownell Signed-off-by: Samuel Ortiz --- include/linux/i2c/twl4030.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index e06555d40d35..a8f84c01f82e 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -354,6 +354,5 @@ int twl4030_set_gpio_debounce(int gpio, int enable); #define TWL4030_REG_VUSB1V5 17 #define TWL4030_REG_VUSB1V8 18 #define TWL4030_REG_VUSB3V1 19 -#define TWL4030_REG_VUSBCP 20 #endif /* End of __TWL4030_H */ -- cgit v1.2.3 From 856f6fd119411d5701d5db96e1aae1dd69923887 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 18 Dec 2008 10:54:27 +0100 Subject: mfd: Dialog DA9030 battery charger MFD driver This patch amends DA903x MFD driver with definitions and methods needed for battery charger driver. Signed-off-by: Mike Rapoport Signed-off-by: Samuel Ortiz --- include/linux/mfd/da903x.h | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h index cad314c12439..115dbe965082 100644 --- a/include/linux/mfd/da903x.h +++ b/include/linux/mfd/da903x.h @@ -32,6 +32,7 @@ enum { DA9030_ID_LDO18, DA9030_ID_LDO19, DA9030_ID_LDO_INT, /* LDO Internal */ + DA9030_ID_BAT, /* battery charger */ DA9034_ID_LED_1, DA9034_ID_LED_2, @@ -93,6 +94,43 @@ struct da9034_touch_pdata { int y_inverted; }; +/* DA9030 battery charger data */ +struct power_supply_info; + +struct da9030_battery_info { + /* battery parameters */ + struct power_supply_info *battery_info; + + /* current and voltage to use for battery charging */ + unsigned int charge_milliamp; + unsigned int charge_millivolt; + + /* voltage thresholds (in millivolts) */ + int vbat_low; + int vbat_crit; + int vbat_charge_start; + int vbat_charge_stop; + int vbat_charge_restart; + + /* battery nominal minimal and maximal voltages in millivolts */ + int vcharge_min; + int vcharge_max; + + /* Temperature thresholds. These are DA9030 register values + "as is" and should be measured for each battery type */ + int tbat_low; + int tbat_high; + int tbat_restart; + + + /* battery monitor interval (seconds) */ + unsigned int batmon_interval; + + /* platform callbacks for battery low and critical events */ + void (*battery_low)(void); + void (*battery_critical)(void); +}; + struct da903x_subdev_info { int id; const char *name; @@ -190,11 +228,13 @@ extern int da903x_unregister_notifier(struct device *dev, extern int da903x_query_status(struct device *dev, unsigned int status); -/* NOTE: the two functions below are not intended for use outside - * of the DA9034 sub-device drivers +/* NOTE: the functions below are not intended for use outside + * of the DA903x sub-device drivers */ extern int da903x_write(struct device *dev, int reg, uint8_t val); +extern int da903x_writes(struct device *dev, int reg, int len, uint8_t *val); extern int da903x_read(struct device *dev, int reg, uint8_t *val); +extern int da903x_reads(struct device *dev, int reg, int len, uint8_t *val); extern int da903x_update(struct device *dev, int reg, uint8_t val, uint8_t mask); extern int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask); extern int da903x_clr_bits(struct device *dev, int reg, uint8_t bit_mask); -- cgit v1.2.3 From 96920630624868add3f63f596523e70dbb64549a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Dec 2008 23:09:50 +0100 Subject: mfd: Add WM8352 support The WM8352 is a variant of the WM8350. Aside from the register defaults there are no software visible differences to the WM8350. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index afeff6f1316c..737579086d01 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -589,6 +589,10 @@ extern const u16 wm8350_mode0_defaults[]; extern const u16 wm8350_mode1_defaults[]; extern const u16 wm8350_mode2_defaults[]; extern const u16 wm8350_mode3_defaults[]; +extern const u16 wm8352_mode0_defaults[]; +extern const u16 wm8352_mode1_defaults[]; +extern const u16 wm8352_mode2_defaults[]; +extern const u16 wm8352_mode3_defaults[]; struct wm8350; -- cgit v1.2.3 From 645524a9c6e1e42dc4fe03217befb20e2fc4d43e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Dec 2008 23:12:16 +0100 Subject: mfd: Support configurable numbers of DCDCs and ISINKs on WM8350 Some WM8350 variants have fewer DCDCs and ISINKs. Identify these at probe and refuse to use the absent DCDCs when running on these chips. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/pmic.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h index 69b69e07f62f..96acbfc8aa12 100644 --- a/include/linux/mfd/wm8350/pmic.h +++ b/include/linux/mfd/wm8350/pmic.h @@ -701,6 +701,10 @@ struct platform_device; struct regulator_init_data; struct wm8350_pmic { + /* Number of regulators of each type on this device */ + int max_dcdc; + int max_isink; + /* ISINK to DCDC mapping */ int isink_A_dcdc; int isink_B_dcdc; -- cgit v1.2.3 From ca23f8c1b0aa15dc69565244fc5dffa67a72dd02 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Dec 2008 23:12:28 +0100 Subject: mfd: Add WM8351 support The WM8351 is a WM8350 variant. As well as register default changes the WM8351 has fewer voltage and current regulators than the WM8350. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 737579086d01..980669d50dca 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -589,6 +589,10 @@ extern const u16 wm8350_mode0_defaults[]; extern const u16 wm8350_mode1_defaults[]; extern const u16 wm8350_mode2_defaults[]; extern const u16 wm8350_mode3_defaults[]; +extern const u16 wm8351_mode0_defaults[]; +extern const u16 wm8351_mode1_defaults[]; +extern const u16 wm8351_mode2_defaults[]; +extern const u16 wm8351_mode3_defaults[]; extern const u16 wm8352_mode0_defaults[]; extern const u16 wm8352_mode1_defaults[]; extern const u16 wm8352_mode2_defaults[]; -- cgit v1.2.3 From 0931a4c6dbfab03f2bfd22a9170130f7b155d53a Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 22 Dec 2008 12:05:27 +0100 Subject: mfd: dm355evm msp430 driver Basic MFD framework for the MSP430 microcontroller firmware used on the dm355evm board: - Provides an interface for other drivers: register read/write utilities, and register declarations. - Directly exports: * Many signals through the GPIO framework + LEDs + SW6 through gpio sysfs + NTSC/nPAL jumper through gpio sysfs + ... more could be added later, e.g. MMC signals * Child devices: + LEDs, via leds-gpio child (and default triggers) + RTC, via rtc-dm355evm child device + Buttons and IR control, via dm355evm_keys - Supports power-off system call. Use the reset button to power the board back up; the power supply LED will be on, but the MSP430 waits to re-activate the regulators. - On probe() this: * Announces firmware revision * Turns off the banked LEDs * Exports the resources noted above * Hooks the power-off support * Muxes tvp5146 -or- imager for video input Unless the new tvp514x driver (tracked for mainline) is configured, this assumes that some custom imager driver handles video-in. This completely ignores the registers reporting the output voltages on the various power supplies. Someone could add a hwmon interface if that seems useful. Signed-off-by: David Brownell Signed-off-by: Samuel Ortiz --- include/linux/i2c/dm355evm_msp.h | 79 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 include/linux/i2c/dm355evm_msp.h (limited to 'include/linux') diff --git a/include/linux/i2c/dm355evm_msp.h b/include/linux/i2c/dm355evm_msp.h new file mode 100644 index 000000000000..372470350fab --- /dev/null +++ b/include/linux/i2c/dm355evm_msp.h @@ -0,0 +1,79 @@ +/* + * dm355evm_msp.h - support MSP430 microcontroller on DM355EVM board + */ +#ifndef __LINUX_I2C_DM355EVM_MSP +#define __LINUX_I2C_DM355EVM_MSP + +/* + * Written against Spectrum's writeup for the A4 firmware revision, + * and tweaked to match source and rev D2 schematics by removing CPLD + * and NOR flash hooks (which were last appropriate in rev B boards). + * + * Note that the firmware supports a flavor of write posting ... to be + * sure a write completes, issue another read or write. + */ + +/* utilities to access "registers" emulated by msp430 firmware */ +extern int dm355evm_msp_write(u8 value, u8 reg); +extern int dm355evm_msp_read(u8 reg); + + +/* command/control registers */ +#define DM355EVM_MSP_COMMAND 0x00 +# define MSP_COMMAND_NULL 0 +# define MSP_COMMAND_RESET_COLD 1 +# define MSP_COMMAND_RESET_WARM 2 +# define MSP_COMMAND_RESET_WARM_I 3 +# define MSP_COMMAND_POWEROFF 4 +# define MSP_COMMAND_IR_REINIT 5 +#define DM355EVM_MSP_STATUS 0x01 +# define MSP_STATUS_BAD_OFFSET BIT(0) +# define MSP_STATUS_BAD_COMMAND BIT(1) +# define MSP_STATUS_POWER_ERROR BIT(2) +# define MSP_STATUS_RXBUF_OVERRUN BIT(3) +#define DM355EVM_MSP_RESET 0x02 /* 0 bits == in reset */ +# define MSP_RESET_DC5 BIT(0) +# define MSP_RESET_TVP5154 BIT(2) +# define MSP_RESET_IMAGER BIT(3) +# define MSP_RESET_ETHERNET BIT(4) +# define MSP_RESET_SYS BIT(5) +# define MSP_RESET_AIC33 BIT(7) + +/* GPIO registers ... bit patterns mostly match the source MSP ports */ +#define DM355EVM_MSP_LED 0x03 /* active low (MSP P4) */ +#define DM355EVM_MSP_SWITCH1 0x04 /* (MSP P5, masked) */ +# define MSP_SWITCH1_SW6_1 BIT(0) +# define MSP_SWITCH1_SW6_2 BIT(1) +# define MSP_SWITCH1_SW6_3 BIT(2) +# define MSP_SWITCH1_SW6_4 BIT(3) +# define MSP_SWITCH1_J1 BIT(4) /* NTSC/PAL */ +# define MSP_SWITCH1_MSP_INT BIT(5) /* active low */ +#define DM355EVM_MSP_SWITCH2 0x05 /* (MSP P6, masked) */ +# define MSP_SWITCH2_SW10 BIT(3) +# define MSP_SWITCH2_SW11 BIT(4) +# define MSP_SWITCH2_SW12 BIT(5) +# define MSP_SWITCH2_SW13 BIT(6) +# define MSP_SWITCH2_SW14 BIT(7) +#define DM355EVM_MSP_SDMMC 0x06 /* (MSP P2, masked) */ +# define MSP_SDMMC_0_WP BIT(1) +# define MSP_SDMMC_0_CD BIT(2) /* active low */ +# define MSP_SDMMC_1_WP BIT(3) +# define MSP_SDMMC_1_CD BIT(4) /* active low */ +#define DM355EVM_MSP_FIRMREV 0x07 /* not a GPIO (out of order) */ +#define DM355EVM_MSP_VIDEO_IN 0x08 /* (MSP P3, masked) */ +# define MSP_VIDEO_IMAGER BIT(7) /* low == tvp5146 */ + +/* power supply registers are currently omitted */ + +/* RTC registers */ +#define DM355EVM_MSP_RTC_0 0x12 /* LSB */ +#define DM355EVM_MSP_RTC_1 0x13 +#define DM355EVM_MSP_RTC_2 0x14 +#define DM355EVM_MSP_RTC_3 0x15 /* MSB */ + +/* input event queue registers; code == ((HIGH << 8) | LOW) */ +#define DM355EVM_MSP_INPUT_COUNT 0x16 /* decrement by reading LOW */ +#define DM355EVM_MSP_INPUT_HIGH 0x17 +#define DM355EVM_MSP_INPUT_LOW 0x18 + +#endif /* __LINUX_I2C_DM355EVM_MSP */ -- cgit v1.2.3 From f3298dc4f2277874d40cb4fc3a6e277317d6603b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 03:16:51 -0500 Subject: sanitize audit_socketcall * don't bother with allocations * now that it can't fail, make it return void Signed-off-by: Al Viro --- include/linux/audit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 26c4f6f65a46..466a953d4bf6 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -446,7 +446,7 @@ extern void audit_log_task_context(struct audit_buffer *ab); extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); extern int audit_bprm(struct linux_binprm *bprm); -extern int audit_socketcall(int nargs, unsigned long *args); +extern void audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int __audit_fd_pair(int fd1, int fd2); extern int audit_set_macxattr(const char *name); @@ -549,7 +549,7 @@ extern int audit_signals; #define audit_ipc_obj(i) ({ 0; }) #define audit_ipc_set_perm(q,u,g,m) ({ 0; }) #define audit_bprm(p) ({ 0; }) -#define audit_socketcall(n,a) ({ 0; }) +#define audit_socketcall(n,a) ((void)0) #define audit_fd_pair(n,a) ({ 0; }) #define audit_sockaddr(len, addr) ({ 0; }) #define audit_set_macxattr(n) do { ; } while (0) -- cgit v1.2.3 From a33e6751003c5ade603737d828b1519d980ce392 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 03:40:06 -0500 Subject: sanitize audit_ipc_obj() * get rid of allocations * make it return void * simplify callers Signed-off-by: Al Viro --- include/linux/audit.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 466a953d4bf6..f8578b9088e1 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -443,7 +443,7 @@ extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); #define audit_get_loginuid(t) ((t)->loginuid) #define audit_get_sessionid(t) ((t)->sessionid) extern void audit_log_task_context(struct audit_buffer *ab); -extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp); +extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); extern int audit_bprm(struct linux_binprm *bprm); extern void audit_socketcall(int nargs, unsigned long *args); @@ -460,11 +460,10 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *old); extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old); -static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) +static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { if (unlikely(!audit_dummy_context())) - return __audit_ipc_obj(ipcp); - return 0; + __audit_ipc_obj(ipcp); } static inline int audit_fd_pair(int fd1, int fd2) { @@ -546,7 +545,7 @@ extern int audit_signals; #define audit_get_loginuid(t) (-1) #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) -#define audit_ipc_obj(i) ({ 0; }) +#define audit_ipc_obj(i) ((void)0) #define audit_ipc_set_perm(q,u,g,m) ({ 0; }) #define audit_bprm(p) ({ 0; }) #define audit_socketcall(n,a) ((void)0) -- cgit v1.2.3 From e816f370cbadd2afea9f1a42f232d0636137d563 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 03:47:15 -0500 Subject: sanitize audit_ipc_set_perm() * get rid of allocations * make it return void * simplify callers Signed-off-by: Al Viro --- include/linux/audit.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index f8578b9088e1..b7abfe0d6737 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -444,7 +444,7 @@ extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); #define audit_get_sessionid(t) ((t)->sessionid) extern void audit_log_task_context(struct audit_buffer *ab); extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); -extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); +extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); extern int audit_bprm(struct linux_binprm *bprm); extern void audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); @@ -471,11 +471,10 @@ static inline int audit_fd_pair(int fd1, int fd2) return __audit_fd_pair(fd1, fd2); return 0; } -static inline int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) +static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) { if (unlikely(!audit_dummy_context())) - return __audit_ipc_set_perm(qbytes, uid, gid, mode); - return 0; + __audit_ipc_set_perm(qbytes, uid, gid, mode); } static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr) { @@ -546,7 +545,7 @@ extern int audit_signals; #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) #define audit_ipc_obj(i) ((void)0) -#define audit_ipc_set_perm(q,u,g,m) ({ 0; }) +#define audit_ipc_set_perm(q,u,g,m) ((void)0) #define audit_bprm(p) ({ 0; }) #define audit_socketcall(n,a) ((void)0) #define audit_fd_pair(n,a) ({ 0; }) -- cgit v1.2.3 From 7392906ea915b9a2c14dea32b3604b4e178f82f7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 06:58:59 -0500 Subject: sanitize audit_mq_getsetattr() * get rid of allocations * make it return void * don't duplicate parts of audit_dummy_context() Signed-off-by: Al Viro --- include/linux/audit.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index b7abfe0d6737..b7707e577b80 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -454,7 +454,7 @@ extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); -extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); +extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old); @@ -500,11 +500,10 @@ static inline int audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_n return __audit_mq_notify(mqdes, u_notification); return 0; } -static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) +static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { if (unlikely(!audit_dummy_context())) - return __audit_mq_getsetattr(mqdes, mqstat); - return 0; + __audit_mq_getsetattr(mqdes, mqstat); } static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, @@ -555,7 +554,7 @@ extern int audit_signals; #define audit_mq_timedsend(d,l,p,t) ({ 0; }) #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) #define audit_mq_notify(d,n) ({ 0; }) -#define audit_mq_getsetattr(d,s) ({ 0; }) +#define audit_mq_getsetattr(d,s) ((void)0) #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) #define audit_log_capset(pid, ncr, ocr) ({ 0; }) #define audit_ptrace(t) ((void)0) -- cgit v1.2.3 From 20114f71b27cafeb7c7e41d2b0f0b68c3fbb022b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 07:16:12 -0500 Subject: sanitize audit_mq_notify() * don't copy_from_user() twice * don't bother with allocations * don't duplicate parts of audit_dummy_context() * make it return void Signed-off-by: Al Viro --- include/linux/audit.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index b7707e577b80..8101d2c4a995 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -453,7 +453,7 @@ extern int audit_set_macxattr(const char *name); extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr); extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); -extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); +extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification); extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, @@ -494,11 +494,10 @@ static inline int audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned in return __audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout); return 0; } -static inline int audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification) +static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) { if (unlikely(!audit_dummy_context())) - return __audit_mq_notify(mqdes, u_notification); - return 0; + __audit_mq_notify(mqdes, notification); } static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { @@ -553,7 +552,7 @@ extern int audit_signals; #define audit_mq_open(o,m,a) ({ 0; }) #define audit_mq_timedsend(d,l,p,t) ({ 0; }) #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) -#define audit_mq_notify(d,n) ({ 0; }) +#define audit_mq_notify(d,n) ((void)0) #define audit_mq_getsetattr(d,s) ((void)0) #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) #define audit_log_capset(pid, ncr, ocr) ({ 0; }) -- cgit v1.2.3 From c32c8af43b9adde8d6f938d8e6328c13b8de79ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 03:46:48 -0500 Subject: sanitize AUDIT_MQ_SENDRECV * logging the original value of *msg_prio in mq_timedreceive(2) is insane - the argument is write-only (i.e. syscall always ignores the original value and only overwrites it). * merge __audit_mq_timed{send,receive} * don't do copy_from_user() twice * don't mess with allocations in auditsc part * ... and don't bother checking !audit_enabled and !context in there - we'd already checked for audit_dummy_context(). Signed-off-by: Al Viro --- include/linux/audit.h | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8101d2c4a995..67f0cdd991ba 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -451,8 +451,7 @@ extern int audit_sockaddr(int len, void *addr); extern int __audit_fd_pair(int fd1, int fd2); extern int audit_set_macxattr(const char *name); extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr); -extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout); -extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); +extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout); extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification); extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, @@ -482,17 +481,10 @@ static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u return __audit_mq_open(oflag, mode, u_attr); return 0; } -static inline int audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout) +static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout) { if (unlikely(!audit_dummy_context())) - return __audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout); - return 0; -} -static inline int audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout) -{ - if (unlikely(!audit_dummy_context())) - return __audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout); - return 0; + __audit_mq_sendrecv(mqdes, msg_len, msg_prio, abs_timeout); } static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) { @@ -550,8 +542,7 @@ extern int audit_signals; #define audit_sockaddr(len, addr) ({ 0; }) #define audit_set_macxattr(n) do { ; } while (0) #define audit_mq_open(o,m,a) ({ 0; }) -#define audit_mq_timedsend(d,l,p,t) ({ 0; }) -#define audit_mq_timedreceive(d,l,p,t) ({ 0; }) +#define audit_mq_sendrecv(d,l,p,t) ((void)0) #define audit_mq_notify(d,n) ((void)0) #define audit_mq_getsetattr(d,s) ((void)0) #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) -- cgit v1.2.3 From 564f6993ffef656aebaf46cf2f1f6cb4f5c97207 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 04:02:26 -0500 Subject: sanitize audit_mq_open() * don't bother with allocations * don't do double copy_from_user() * don't duplicate parts of check for audit_dummy_context() Signed-off-by: Al Viro --- include/linux/audit.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 67f0cdd991ba..54978bdd2bd4 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -450,7 +450,7 @@ extern void audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int __audit_fd_pair(int fd1, int fd2); extern int audit_set_macxattr(const char *name); -extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr); +extern void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr); extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout); extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification); extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); @@ -475,11 +475,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid if (unlikely(!audit_dummy_context())) __audit_ipc_set_perm(qbytes, uid, gid, mode); } -static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr) +static inline void audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr) { if (unlikely(!audit_dummy_context())) - return __audit_mq_open(oflag, mode, u_attr); - return 0; + __audit_mq_open(oflag, mode, attr); } static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout) { @@ -541,7 +540,7 @@ extern int audit_signals; #define audit_fd_pair(n,a) ({ 0; }) #define audit_sockaddr(len, addr) ({ 0; }) #define audit_set_macxattr(n) do { ; } while (0) -#define audit_mq_open(o,m,a) ({ 0; }) +#define audit_mq_open(o,m,a) ((void)0) #define audit_mq_sendrecv(d,l,p,t) ((void)0) #define audit_mq_notify(d,n) ((void)0) #define audit_mq_getsetattr(d,s) ((void)0) -- cgit v1.2.3 From 157cf649a735a2f7e8dba0ed08e6e38b6c30d886 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 04:57:47 -0500 Subject: sanitize audit_fd_pair() * no allocations * return void Signed-off-by: Al Viro --- include/linux/audit.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 54978bdd2bd4..bd59cd1e3219 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -448,7 +448,7 @@ extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mod extern int audit_bprm(struct linux_binprm *bprm); extern void audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); -extern int __audit_fd_pair(int fd1, int fd2); +extern void __audit_fd_pair(int fd1, int fd2); extern int audit_set_macxattr(const char *name); extern void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr); extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout); @@ -464,11 +464,10 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) if (unlikely(!audit_dummy_context())) __audit_ipc_obj(ipcp); } -static inline int audit_fd_pair(int fd1, int fd2) +static inline void audit_fd_pair(int fd1, int fd2) { if (unlikely(!audit_dummy_context())) - return __audit_fd_pair(fd1, fd2); - return 0; + __audit_fd_pair(fd1, fd2); } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) { @@ -537,7 +536,7 @@ extern int audit_signals; #define audit_ipc_set_perm(q,u,g,m) ((void)0) #define audit_bprm(p) ({ 0; }) #define audit_socketcall(n,a) ((void)0) -#define audit_fd_pair(n,a) ({ 0; }) +#define audit_fd_pair(n,a) ((void)0) #define audit_sockaddr(len, addr) ({ 0; }) #define audit_set_macxattr(n) do { ; } while (0) #define audit_mq_open(o,m,a) ((void)0) -- cgit v1.2.3 From 57f71a0af4244d9ba3c0bce74b1d2e66e8d520bd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Jan 2009 14:52:57 -0500 Subject: sanitize audit_log_capset() * no allocations * return void * don't duplicate checked for dummy context Signed-off-by: Al Viro --- include/linux/audit.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index bd59cd1e3219..7ddcb6a29eb1 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -457,7 +457,7 @@ extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old); -extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old); +extern void __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -504,12 +504,11 @@ static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, return 0; } -static inline int audit_log_capset(pid_t pid, const struct cred *new, +static inline void audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old) { if (unlikely(!audit_dummy_context())) - return __audit_log_capset(pid, new, old); - return 0; + __audit_log_capset(pid, new, old); } extern int audit_n_rules; @@ -544,7 +543,7 @@ extern int audit_signals; #define audit_mq_notify(d,n) ((void)0) #define audit_mq_getsetattr(d,s) ((void)0) #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) -#define audit_log_capset(pid, ncr, ocr) ({ 0; }) +#define audit_log_capset(pid, ncr, ocr) ((void)0) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 -- cgit v1.2.3 From 0590b9335a1c72a3f0defcc6231287f7817e07c8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 23:45:27 -0500 Subject: fixing audit rule ordering mess, part 1 Problem: ordering between the rules on exit chain is currently lost; all watch and inode rules are listed after everything else _and_ exit,never on one kind doesn't stop exit,always on another from being matched. Solution: assign priorities to rules, keep track of the current highest-priority matching rule and its result (always/never). Signed-off-by: Al Viro --- include/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 7ddcb6a29eb1..5b47eeb00d53 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -373,6 +373,7 @@ struct audit_krule { struct audit_watch *watch; /* associated watch */ struct audit_tree *tree; /* associated watched tree */ struct list_head rlist; /* entry in audit_{watch,tree}.rules list */ + u64 prio; }; struct audit_field { -- cgit v1.2.3 From e45aa212ea81d39b38ba158df344dc3a500153e5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Dec 2008 01:17:50 -0500 Subject: audit rules ordering, part 2 Fix the actual rule listing; add per-type lists _not_ used for matching, with all exit,... sitting on one such list. Simplifies "do something for all rules" logics, while we are at it... Signed-off-by: Al Viro --- include/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 5b47eeb00d53..cc71fdb56ae2 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -373,6 +373,7 @@ struct audit_krule { struct audit_watch *watch; /* associated watch */ struct audit_tree *tree; /* associated watched tree */ struct list_head rlist; /* entry in audit_{watch,tree}.rules list */ + struct list_head list; /* for AUDIT_LIST* purposes only */ u64 prio; }; -- cgit v1.2.3 From 5af75d8d58d0f9f7b7c0515b35786b22892d5f12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Dec 2008 05:59:26 -0500 Subject: audit: validate comparison operations, store them in sane form Don't store the field->op in the messy (and very inconvenient for e.g. audit_comparator()) form; translate to dense set of values and do full validation of userland-submitted value while we are at it. ->audit_init_rule() and ->audit_match_rule() get new values now; in-tree instances updated. Signed-off-by: Al Viro --- include/linux/audit.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index cc71fdb56ae2..67e5dbfc2961 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -247,6 +247,18 @@ #define AUDIT_GREATER_THAN_OR_EQUAL (AUDIT_GREATER_THAN|AUDIT_EQUAL) #define AUDIT_OPERATORS (AUDIT_EQUAL|AUDIT_NOT_EQUAL|AUDIT_BIT_MASK) +enum { + Audit_equal, + Audit_not_equal, + Audit_bitmask, + Audit_bittest, + Audit_lt, + Audit_gt, + Audit_le, + Audit_ge, + Audit_bad +}; + /* Status symbols */ /* Mask values */ #define AUDIT_STATUS_ENABLED 0x0001 -- cgit v1.2.3 From 0a30c5cefa53cbac429dcb2de906c0637b646253 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 4 Jan 2009 12:00:47 -0800 Subject: spi.h uses/needs device.h Include header files as used/needed: In file included from drivers/leds/leds-dac124s085.c:16: include/linux/spi/spi.h:66: error: field 'dev' has incomplete type include/linux/spi/spi.h: In function 'to_spi_device': include/linux/spi/spi.h:100: warning: type defaults to 'int' in declaration of '__mptr' ... Signed-off-by: Randy Dunlap Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/spi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4be01bb44377..82229317753d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -19,6 +19,8 @@ #ifndef __LINUX_SPI_H #define __LINUX_SPI_H +#include + /* * INTERFACES between SPI master-side drivers and SPI infrastructure. * (There's no SPI slave support for Linux yet...) -- cgit v1.2.3 From c644f0e4b56f9a2fc066cd0d75a18074d130e4a3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sun, 4 Jan 2009 12:00:48 -0800 Subject: fs: introduce bgl_lock_ptr() As suggested by Andreas Dilger, introduce a bgl_lock_ptr() helper in and add separate sb_bgl_lock() helpers to filesystem specific header files to break the hidden dependency to struct ext[234]_sb_info. Also, while at it, convert the macros to static inlines to try make up for all the times I broke Andrew Morton's tree. Acked-by: Andreas Dilger Signed-off-by: Pekka Enberg Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/blockgroup_lock.h | 7 +++++-- include/linux/ext2_fs_sb.h | 6 ++++++ include/linux/ext3_fs_sb.h | 6 ++++++ 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blockgroup_lock.h b/include/linux/blockgroup_lock.h index 8607312983bd..e44b88ba552b 100644 --- a/include/linux/blockgroup_lock.h +++ b/include/linux/blockgroup_lock.h @@ -53,7 +53,10 @@ static inline void bgl_lock_init(struct blockgroup_lock *bgl) * The accessor is a macro so we can embed a blockgroup_lock into different * superblock types */ -#define sb_bgl_lock(sb, block_group) \ - (&(sb)->s_blockgroup_lock.locks[(block_group) & (NR_BG_LOCKS-1)].lock) +static inline spinlock_t * +bgl_lock_ptr(struct blockgroup_lock *bgl, unsigned int block_group) +{ + return &bgl->locks[(block_group) & (NR_BG_LOCKS-1)].lock; +} #endif diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h index f273415ab6f1..dc541f3653d1 100644 --- a/include/linux/ext2_fs_sb.h +++ b/include/linux/ext2_fs_sb.h @@ -108,4 +108,10 @@ struct ext2_sb_info { struct ext2_reserve_window_node s_rsv_window_head; }; +static inline spinlock_t * +sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group) +{ + return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); +} + #endif /* _LINUX_EXT2_FS_SB */ diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index b65f0288b842..e024e38248ff 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -83,4 +83,10 @@ struct ext3_sb_info { #endif }; +static inline spinlock_t * +sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group) +{ + return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); +} + #endif /* _LINUX_EXT3_FS_SB */ -- cgit v1.2.3 From 54566b2c1594c2326a645a3551f9d989f7ba3c5e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 4 Jan 2009 12:00:53 -0800 Subject: fs: symlink write_begin allocation context fix With the write_begin/write_end aops, page_symlink was broken because it could no longer pass a GFP_NOFS type mask into the point where the allocations happened. They are done in write_begin, which would always assume that the filesystem can be entered from reclaim. This bug could cause filesystem deadlocks. The funny thing with having a gfp_t mask there is that it doesn't really allow the caller to arbitrarily tinker with the context in which it can be called. It couldn't ever be GFP_ATOMIC, for example, because it needs to take the page lock. The only thing any callers care about is __GFP_FS anyway, so turn that into a single flag. Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on this flag in their write_begin function. Change __grab_cache_page to accept a nofs argument as well, to honour that flag (while we're there, change the name to grab_cache_page_write_begin which is more instructive and does away with random leading underscores). This is really a more flexible way to go in the end anyway -- if a filesystem happens to want any extra allocations aside from the pagecache ones in ints write_begin function, it may now use GFP_KERNEL (rather than GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a random example). [kosaki.motohiro@jp.fujitsu.com: fix ubifs] [kosaki.motohiro@jp.fujitsu.com: fix fuse] Signed-off-by: Nick Piggin Reviewed-by: KOSAKI Motohiro Cc: [2.6.28.x] Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton [ Cleaned up the calling convention: just pass in the AOP flags untouched to the grab_cache_page_write_begin() function. That just simplifies everybody, and may even allow future expansion of the logic. - Linus ] Signed-off-by: Linus Torvalds --- include/linux/fs.h | 5 ++++- include/linux/pagemap.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e2170ee21e18..f2a3010140e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -423,6 +423,9 @@ enum positive_aop_returns { #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ +#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct + * helper code (eg buffer layer) + * to clear GFP_FS from alloc */ /* * oh the beauties of C type declarations. @@ -2035,7 +2038,7 @@ extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); extern int __page_symlink(struct inode *inode, const char *symname, int len, - gfp_t gfp_mask); + int nofs); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern int generic_readlink(struct dentry *, char __user *, int); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 709742be02f0..01ca0856caff 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -241,7 +241,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, int tag, unsigned int nr_pages, struct page **pages); -struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index); +struct page *grab_cache_page_write_begin(struct address_space *mapping, + pgoff_t index, unsigned flags); /* * Returns locked page at given index in given cache, creating it if needed. -- cgit v1.2.3 From 099e657625e801adf82054c8050dde5aceb68452 Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Sun, 4 Jan 2009 12:00:54 -0800 Subject: rtc: add alarm/update irq interfaces Add standard interfaces for alarm/update irqs enabling. Drivers are no more required to implement equivalent ioctl code as rtc-dev will provide it. UIE emulation should now be handled correctly and will work even for those RTC drivers who cannot be configured to do both UIE and AIE. Signed-off-by: Alessandro Zummo Cc: David Brownell Cc: Atsushi Nemoto Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rtc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 91f597ad6acc..4046b75563c1 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -145,6 +145,8 @@ struct rtc_class_ops { int (*irq_set_state)(struct device *, int enabled); int (*irq_set_freq)(struct device *, int freq); int (*read_callback)(struct device *, int data); + int (*alarm_irq_enable)(struct device *, unsigned int enabled); + int (*update_irq_enable)(struct device *, unsigned int enabled); }; #define RTC_DEVICE_NAME_SIZE 20 @@ -181,7 +183,7 @@ struct rtc_device struct timer_list uie_timer; /* Those fields are protected by rtc->irq_lock */ unsigned int oldsecs; - unsigned int irq_active:1; + unsigned int uie_irq_active:1; unsigned int stop_uie_polling:1; unsigned int uie_task_active:1; unsigned int uie_timer_active:1; @@ -216,6 +218,10 @@ extern int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled); extern int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq); +extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled); +extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled); +extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, + unsigned int enabled); typedef struct rtc_task { void (*func)(void *private_data); -- cgit v1.2.3 From 088af9a6e05d51e7c3dc85d45d8b7a52c3ee08d7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 31 Dec 2008 12:31:18 +0100 Subject: module: fix module loading failure of large kernel modules for parisc When creating the final layout of a kernel module in memory, allow the module loader to reserve some additional memory in front of a given section. This is currently only needed for the parisc port which needs to put the stub entries there to fulfill the 17/22bit PCREL relocations with large kernel modules like xfs. Signed-off-by: Helge Deller Signed-off-by: Rusty Russell (renamed fn) --- include/linux/moduleloader.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index eb1033957486..c1f40c2f7ffb 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -13,6 +13,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, char *secstrings, struct module *mod); +/* Additional bytes needed by arch in front of individual sections */ +unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section); + /* Allocator used for allocating struct module, core sections and init sections. Returns NULL on failure. */ void *module_alloc(unsigned long size); -- cgit v1.2.3 From 9ea09af3bd3090e8349ca2899ca2011bd94cda85 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Dec 2008 12:36:30 +0100 Subject: stop_machine: introduce stop_machine_create/destroy. Introduce stop_machine_create/destroy. With this interface subsystems that need a non-failing stop_machine environment can create the stop_machine machine threads before actually calling stop_machine. When the threads aren't needed anymore they can be killed with stop_machine_destroy again. When stop_machine gets called and the threads aren't present they will be created and destroyed automatically. This restores the old behaviour of stop_machine. This patch also converts cpu hotplug to the new interface since it is special: cpu_down calls __stop_machine instead of stop_machine. However the kstop threads will only be created when stop_machine gets called. Changing the code so that the threads would be created automatically on __stop_machine is currently not possible: when __stop_machine gets called we hold cpu_add_remove_lock, which is the same lock that create_rt_workqueue would take. So the workqueue needs to be created before the cpu hotplug code locks cpu_add_remove_lock. Signed-off-by: Heiko Carstens Signed-off-by: Rusty Russell --- include/linux/stop_machine.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 74d59a641362..baba3a23a814 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -35,6 +35,24 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); * won't come or go while it's being called. Used by hotplug cpu. */ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); + +/** + * stop_machine_create: create all stop_machine threads + * + * Description: This causes all stop_machine threads to be created before + * stop_machine actually gets called. This can be used by subsystems that + * need a non failing stop_machine infrastructure. + */ +int stop_machine_create(void); + +/** + * stop_machine_destroy: destroy all stop_machine threads + * + * Description: This causes all stop_machine threads which were created with + * stop_machine_create to be destroyed again. + */ +void stop_machine_destroy(void); + #else static inline int stop_machine(int (*fn)(void *), void *data, @@ -46,5 +64,9 @@ static inline int stop_machine(int (*fn)(void *), void *data, local_irq_enable(); return ret; } + +static inline int stop_machine_create(void) { return 0; } +static inline void stop_machine_destroy(void) { } + #endif /* CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ -- cgit v1.2.3 From 5d38a079ce3971f932bbdc0dc5b887806fabd5dc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 4 Jan 2009 16:13:40 -0800 Subject: gro: Add page frag support This patch allows GRO to merge page frags (skb_shinfo(skb)->frags) in one skb, rather than using the less efficient frag_list. It also adds a new interface, napi_gro_frags to allow drivers to inject page frags directly into the stack without allocating an skb. This is intended to be the GRO equivalent for LRO's lro_receive_frags interface. The existing GSO interface can already handle page frags with or without an appended frag_list so nothing needs to be changed there. The merging itself is rather simple. We store any new frag entries after the last existing entry, without checking whether the first new entry can be merged with the last existing entry. Making this check would actually be easy but since no existing driver can produce contiguous frags anyway it would just be mental masturbation. If the total number of entries would exceed the capacity of a single skb, we simply resort to using frag_list as we do now. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41e1224651cf..c28bbba3c23d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -313,10 +313,11 @@ struct napi_struct { #ifdef CONFIG_NETPOLL spinlock_t poll_lock; int poll_owner; - struct net_device *dev; #endif + struct net_device *dev; struct list_head dev_list; struct sk_buff *gro_list; + struct sk_buff *skb; }; enum @@ -990,6 +991,9 @@ struct napi_gro_cb { /* Number of segments aggregated. */ int count; + + /* Free the skb? */ + int free; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) @@ -1011,6 +1015,14 @@ struct packet_type { struct list_head list; }; +struct napi_gro_fraginfo { + skb_frag_t frags[MAX_SKB_FRAGS]; + unsigned int nr_frags; + unsigned int ip_summed; + unsigned int len; + __wsum csum; +}; + #include #include @@ -1363,6 +1375,8 @@ extern int netif_receive_skb(struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); extern int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); +extern int napi_gro_frags(struct napi_struct *napi, + struct napi_gro_fraginfo *info); extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); -- cgit v1.2.3 From 14eaddc967b16017d4a1a24d2be6c28ecbe06ed8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 31 Dec 2008 15:15:42 +0000 Subject: CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #2] Fix a regression in cap_capable() due to: commit 5ff7711e635b32f0a1e558227d030c7e45b4a465 Author: David Howells Date: Wed Dec 31 02:52:28 2008 +0000 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change splits capable() from has_capability() down into the commoncap and SELinux code. The capable() security op now only deals with the current process, and uses the current process's subjective creds. A new security op - task_capable() - is introduced that can check any task's objective creds. strictly the capable() security op is superfluous with the presence of the task_capable() op, however it should be faster to call the capable() op since two fewer arguments need be passed down through the various layers. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include #include #include #include #include #include #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/capability.h | 17 ++++++++++++++-- include/linux/security.h | 49 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 55 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index e22f48c2a46f..5b8a13214451 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -529,8 +529,21 @@ extern const kernel_cap_t __cap_init_eff_set; * * Note that this does not set PF_SUPERPRIV on the task. */ -#define has_capability(t, cap) (security_capable((t), (cap)) == 0) -#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0) +#define has_capability(t, cap) (security_task_capable((t), (cap)) == 0) + +/** + * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) + * @t: The task in question + * @cap: The capability to be tested for + * + * Return true if the specified task has the given superior capability + * currently in effect, false if not, but don't write an audit message for the + * check. + * + * Note that this does not set PF_SUPERPRIV on the task. + */ +#define has_capability_noaudit(t, cap) \ + (security_task_capable_noaudit((t), (cap)) == 0) extern int capable(int cap); diff --git a/include/linux/security.h b/include/linux/security.h index 3416cb85e77b..76989b8bc34f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -48,7 +48,9 @@ struct audit_krule; * These functions are in security/capability.c and are used * as the default capabilities functions */ -extern int cap_capable(struct task_struct *tsk, int cap, int audit); +extern int cap_capable(int cap, int audit); +extern int cap_task_capable(struct task_struct *tsk, const struct cred *cred, + int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -1195,9 +1197,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @permitted contains the permitted capability set. * Return 0 and update @new if permission is granted. * @capable: - * Check whether the @tsk process has the @cap capability. + * Check whether the current process has the @cap capability in its + * subjective/effective credentials. + * @cap contains the capability . + * @audit: Whether to write an audit message or not + * Return 0 if the capability is granted for @tsk. + * @task_capable: + * Check whether the @tsk process has the @cap capability in its + * objective/real credentials. * @tsk contains the task_struct for the process. + * @cred contains the credentials to use. * @cap contains the capability . + * @audit: Whether to write an audit message or not * Return 0 if the capability is granted for @tsk. * @acct: * Check permission before enabling or disabling process accounting. If @@ -1290,7 +1301,9 @@ struct security_operations { const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); - int (*capable) (struct task_struct *tsk, int cap, int audit); + int (*capable) (int cap, int audit); + int (*task_capable) (struct task_struct *tsk, const struct cred *cred, + int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); @@ -1556,8 +1569,9 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -int security_capable(struct task_struct *tsk, int cap); -int security_capable_noaudit(struct task_struct *tsk, int cap); +int security_capable(int cap); +int security_task_capable(struct task_struct *tsk, int cap); +int security_task_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); @@ -1754,14 +1768,31 @@ static inline int security_capset(struct cred *new, return cap_capset(new, old, effective, inheritable, permitted); } -static inline int security_capable(struct task_struct *tsk, int cap) +static inline int security_capable(int cap) { - return cap_capable(tsk, cap, SECURITY_CAP_AUDIT); + return cap_capable(cap, SECURITY_CAP_AUDIT); } -static inline int security_capable_noaudit(struct task_struct *tsk, int cap) +static inline int security_task_capable(struct task_struct *tsk, int cap) { - return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT); + int ret; + + rcu_read_lock(); + ret = cap_task_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT); + rcu_read_unlock(); + return ret; +} + +static inline +int security_task_capable_noaudit(struct task_struct *tsk, int cap) +{ + int ret; + + rcu_read_lock(); + ret = cap_task_capable(tsk, __task_cred(tsk), cap, + SECURITY_CAP_NOAUDIT); + rcu_read_unlock(); + return ret; } static inline int security_acct(struct file *file) -- cgit v1.2.3 From e9079cce201784632aed4b1a3121ee38c1ced0b6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 14 Oct 2008 14:43:29 +0100 Subject: GFS2: Support for FIEMAP ioctl This patch implements the FIEMAP ioctl for GFS2. We can use the generic code (aside from a lock order issue, solved as per Ted Tso's suggestion) for which I've introduced a new variant of the generic function. We also have one exception to deal with, namely stuffed files, so we do that "by hand", setting all the required flags. This has been tested with a modified (I could only find an old version) of Eric's test program, and appears to work correctly. This patch does not currently support FIEMAP of xattrs, but the plan is to add that feature at some future point. Signed-off-by: Steven Whitehouse Cc: Theodore Tso Cc: Eric Sandeen --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f2a3010140e3..e34bc6925fdf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2059,6 +2059,9 @@ extern int vfs_fstat(unsigned int, struct kstat *); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); +extern int __generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len, get_block_t *get_block); extern int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, get_block_t *get_block); -- cgit v1.2.3 From ea7d3fef4222cd98556a0b386598268d4dbf6670 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 4 Jan 2009 13:03:02 -0800 Subject: rcu: eliminate synchronize_rcu_xxx macro Impact: cleanup Expand macro into two files. The synchronize_rcu_xxx macro is quite ugly and it's only used by two callers, so expand it instead. This makes this code easier to change. Signed-off-by: Andi Kleen Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1168fbcea8d4..921340a7b71c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -204,18 +204,6 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); -#define synchronize_rcu_xxx(name, func) \ -void name(void) \ -{ \ - struct rcu_synchronize rcu; \ - \ - init_completion(&rcu.completion); \ - /* Will wake me after RCU finished. */ \ - func(&rcu.head, wakeme_after_rcu); \ - /* Wait for it. */ \ - wait_for_completion(&rcu.completion); \ -} - /** * synchronize_sched - block until all CPUs have exited any non-preemptive * kernel code sequences. -- cgit v1.2.3 From a6037b61c2f5fc99c57c15b26d7cfa58bbb34008 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 Jan 2009 11:28:22 +0100 Subject: hrtimer: fix recursion deadlock by re-introducing the softirq Impact: fix rare runtime deadlock There are a few sites that do: spin_lock_irq(&foo) hrtimer_start(&bar) __run_hrtimer(&bar) func() spin_lock(&foo) which obviously deadlocks. In order to avoid this, never call __run_hrtimer() from hrtimer_start*() context, but instead defer this to softirq context. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0702c4d7bdf0..2062833f5f7a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -253,7 +253,8 @@ enum BLOCK_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, - RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ + HRTIMER_SOFTIRQ, + RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; -- cgit v1.2.3 From c70f22d203fc02c805b6ed4a3483b740dc36786b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 5 Jan 2009 19:07:50 +0800 Subject: sched: clean up arch_reinit_sched_domains() - Make arch_reinit_sched_domains() static. It was exported to be used in s390, but now rebuild_sched_domains() is used instead. - Make it return void. Signed-off-by: Li Zefan Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 38a3f4b15394..91207df702e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -912,7 +912,6 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new); -extern int arch_reinit_sched_domains(void); /* Test a flag in parent sched domain */ static inline int test_sd_parent(struct sched_domain *sd, int flag) -- cgit v1.2.3 From 922ab535bbe73975ce62f71ab9bf8ec9bce71c29 Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:13:58 +0000 Subject: [MTD] LPDDR QINFO records definitions There are declaraton of structures and macros definitions necessary for operations with QINFO in this patch. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/qinfo.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 include/linux/mtd/qinfo.h (limited to 'include/linux') diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h new file mode 100644 index 000000000000..7b3d487d8b3f --- /dev/null +++ b/include/linux/mtd/qinfo.h @@ -0,0 +1,91 @@ +#ifndef __LINUX_MTD_QINFO_H +#define __LINUX_MTD_QINFO_H + +#include +#include +#include +#include +#include +#include +#include + +/* lpddr_private describes lpddr flash chip in memory map + * @ManufactId - Chip Manufacture ID + * @DevId - Chip Device ID + * @qinfo - pointer to qinfo records describing the chip + * @numchips - number of chips including virual RWW partitions + * @chipshift - Chip/partiton size 2^chipshift + * @chips - per-chip data structure + */ +struct lpddr_private { + uint16_t ManufactId; + uint16_t DevId; + struct qinfo_chip *qinfo; + int numchips; + unsigned long chipshift; + struct flchip chips[0]; +}; + +/* qinfo_query_info structure contains request information for + * each qinfo record + * @major - major number of qinfo record + * @major - minor number of qinfo record + * @id_str - descriptive string to access the record + * @desc - detailed description for the qinfo record + */ +struct qinfo_query_info { + uint8_t major; + uint8_t minor; + char *id_str; + char *desc; +}; + +/* + * qinfo_chip structure contains necessary qinfo records data + * @DevSizeShift - Device size 2^n bytes + * @BufSizeShift - Program buffer size 2^n bytes + * @TotalBlocksNum - Total number of blocks + * @UniformBlockSizeShift - Uniform block size 2^UniformBlockSizeShift bytes + * @HWPartsNum - Number of hardware partitions + * @SuspEraseSupp - Suspend erase supported + * @SingleWordProgTime - Single word program 2^SingleWordProgTime u-sec + * @ProgBufferTime - Program buffer write 2^ProgBufferTime u-sec + * @BlockEraseTime - Block erase 2^BlockEraseTime m-sec + */ +struct qinfo_chip { + /* General device info */ + uint16_t DevSizeShift; + uint16_t BufSizeShift; + /* Erase block information */ + uint16_t TotalBlocksNum; + uint16_t UniformBlockSizeShift; + /* Partition information */ + uint16_t HWPartsNum; + /* Optional features */ + uint16_t SuspEraseSupp; + /* Operation typical time */ + uint16_t SingleWordProgTime; + uint16_t ProgBufferTime; + uint16_t BlockEraseTime; +}; + +/* defines for fixup usage */ +#define LPDDR_MFR_ANY 0xffff +#define LPDDR_ID_ANY 0xffff +#define NUMONYX_MFGR_ID 0x0089 +#define R18_DEVICE_ID_1G 0x893c + +static inline map_word lpddr_build_cmd(u_long cmd, struct map_info *map) +{ + map_word val = { {0} }; + val.x[0] = cmd; + return val; +} + +#define CMD(x) lpddr_build_cmd(x, map) +#define CMDVAL(cmd) cmd.x[0] + +struct mtd_info *lpddr_cmdset(struct map_info *); + +#endif + -- cgit v1.2.3 From eb3db27507f74b99241abfa11824d8b6d92b84ef Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:15:33 +0000 Subject: [MTD] LPDDR PFOW definition LPDDR chips use PFOW window for sending commands, reading status and capabilites requesting. This pfow.h - contains definitions for PFOW window fileds, possible commands, error flags and some common macro function to avoid code duplications. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/pfow.h | 159 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 include/linux/mtd/pfow.h (limited to 'include/linux') diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h new file mode 100644 index 000000000000..b730d4f84655 --- /dev/null +++ b/include/linux/mtd/pfow.h @@ -0,0 +1,159 @@ +/* Primary function overlay window definitions + * and service functions used by LPDDR chips + */ +#ifndef __LINUX_MTD_PFOW_H +#define __LINUX_MTD_PFOW_H + +#include + +/* PFOW registers addressing */ +/* Address of symbol "P" */ +#define PFOW_QUERY_STRING_P 0x0000 +/* Address of symbol "F" */ +#define PFOW_QUERY_STRING_F 0x0002 +/* Address of symbol "O" */ +#define PFOW_QUERY_STRING_O 0x0004 +/* Address of symbol "W" */ +#define PFOW_QUERY_STRING_W 0x0006 +/* Identification info for LPDDR chip */ +#define PFOW_MANUFACTURER_ID 0x0020 +#define PFOW_DEVICE_ID 0x0022 +/* Address in PFOW where prog buffer can can be found */ +#define PFOW_PROGRAM_BUFFER_OFFSET 0x0040 +/* Size of program buffer in words */ +#define PFOW_PROGRAM_BUFFER_SIZE 0x0042 +/* Address command code register */ +#define PFOW_COMMAND_CODE 0x0080 +/* command data register */ +#define PFOW_COMMAND_DATA 0x0084 +/* command address register lower address bits */ +#define PFOW_COMMAND_ADDRESS_L 0x0088 +/* command address register upper address bits */ +#define PFOW_COMMAND_ADDRESS_H 0x008a +/* number of bytes to be proggrammed lower address bits */ +#define PFOW_DATA_COUNT_L 0x0090 +/* number of bytes to be proggrammed higher address bits */ +#define PFOW_DATA_COUNT_H 0x0092 +/* command execution register, the only possible value is 0x01 */ +#define PFOW_COMMAND_EXECUTE 0x00c0 +/* 0x01 should be written at this address to clear buffer */ +#define PFOW_CLEAR_PROGRAM_BUFFER 0x00c4 +/* device program/erase suspend register */ +#define PFOW_PROGRAM_ERASE_SUSPEND 0x00c8 +/* device status register */ +#define PFOW_DSR 0x00cc + +/* LPDDR memory device command codes */ +/* They are possible values of PFOW command code register */ +#define LPDDR_WORD_PROGRAM 0x0041 +#define LPDDR_BUFF_PROGRAM 0x00E9 +#define LPDDR_BLOCK_ERASE 0x0020 +#define LPDDR_LOCK_BLOCK 0x0061 +#define LPDDR_UNLOCK_BLOCK 0x0062 +#define LPDDR_READ_BLOCK_LOCK_STATUS 0x0065 +#define LPDDR_INFO_QUERY 0x0098 +#define LPDDR_READ_OTP 0x0097 +#define LPDDR_PROG_OTP 0x00C0 +#define LPDDR_RESUME 0x00D0 + +/* Defines possible value of PFOW command execution register */ +#define LPDDR_START_EXECUTION 0x0001 + +/* Defines possible value of PFOW program/erase suspend register */ +#define LPDDR_SUSPEND 0x0001 + +/* Possible values of PFOW device status register */ +/* access R - read; RC read & clearable */ +#define DSR_DPS (1<<1) /* RC; device protect status + * 0 - not protected 1 - locked */ +#define DSR_PSS (1<<2) /* R; program suspend status; + * 0-prog in progress/completed, + * 1- prog suspended */ +#define DSR_VPPS (1<<3) /* RC; 0-Vpp OK, * 1-Vpp low */ +#define DSR_PROGRAM_STATUS (1<<4) /* RC; 0-successful, 1-error */ +#define DSR_ERASE_STATUS (1<<5) /* RC; erase or blank check status; + * 0-success erase/blank check, + * 1 blank check error */ +#define DSR_ESS (1<<6) /* R; erase suspend status; + * 0-erase in progress/complete, + * 1 erase suspended */ +#define DSR_READY_STATUS (1<<7) /* R; Device status + * 0-busy, + * 1-ready */ +#define DSR_RPS (0x3<<8) /* RC; region program status + * 00 - Success, + * 01-re-program attempt in region with + * object mode data, + * 10-object mode program w attempt in + * region with control mode data + * 11-attempt to program invalid half + * with 0x41 command */ +#define DSR_AOS (1<<12) /* RC; 1- AO related failure */ +#define DSR_AVAILABLE (1<<15) /* R; Device availbility + * 1 - Device available + * 0 - not available */ + +/* The superset of all possible error bits in DSR */ +#define DSR_ERR 0x133A + +static inline void send_pfow_command(struct map_info *map, + unsigned long cmd_code, unsigned long adr, + unsigned long len, map_word *datum) +{ + int bits_per_chip = map_bankwidth(map) * 8; + int chipnum; + struct lpddr_private *lpddr = map->fldrv_priv; + chipnum = adr >> lpddr->chipshift; + + map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE); + map_write(map, CMD(adr & ((1<pfow_base + PFOW_COMMAND_ADDRESS_L); + map_write(map, CMD(adr>>bits_per_chip), + map->pfow_base + PFOW_COMMAND_ADDRESS_H); + if (len) { + map_write(map, CMD(len & ((1<pfow_base + PFOW_DATA_COUNT_L); + map_write(map, CMD(len>>bits_per_chip), + map->pfow_base + PFOW_DATA_COUNT_H); + } + if (datum) + map_write(map, *datum, map->pfow_base + PFOW_COMMAND_DATA); + + /* Command execution start */ + map_write(map, CMD(LPDDR_START_EXECUTION), + map->pfow_base + PFOW_COMMAND_EXECUTE); +} + +static inline void print_drs_error(unsigned dsr) +{ + int prog_status = (dsr & DSR_RPS) >> 8; + + if (!(dsr & DSR_AVAILABLE)) + printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); + if (prog_status & 0x03) + printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " + "half with 41h command\n"); + else if (prog_status & 0x02) + printk(KERN_NOTICE"DSR.9,8: (10) Object Mode Program attempt " + "in region with Control Mode data\n"); + else if (prog_status & 0x01) + printk(KERN_NOTICE"DSR.9,8: (01) Program attempt in region " + "with Object Mode data\n"); + if (!(dsr & DSR_READY_STATUS)) + printk(KERN_NOTICE"DSR.7: (0) Device is Busy\n"); + if (dsr & DSR_ESS) + printk(KERN_NOTICE"DSR.6: (1) Erase Suspended\n"); + if (dsr & DSR_ERASE_STATUS) + printk(KERN_NOTICE"DSR.5: (1) Erase/Blank check error\n"); + if (dsr & DSR_PROGRAM_STATUS) + printk(KERN_NOTICE"DSR.4: (1) Program Error\n"); + if (dsr & DSR_VPPS) + printk(KERN_NOTICE"DSR.3: (1) Vpp low detect, operation " + "aborted\n"); + if (dsr & DSR_PSS) + printk(KERN_NOTICE"DSR.2: (1) Program suspended\n"); + if (dsr & DSR_DPS) + printk(KERN_NOTICE"DSR.1: (1) Aborted Erase/Program attempt " + "on locked block\n"); +} +#endif /* __LINUX_MTD_PFOW_H */ -- cgit v1.2.3 From d13e51e747fee301b404dffcf4a7e1bdc558969b Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:21:10 +0000 Subject: [MTD] LPDDR added new pfow_base parameter We need to supply additional parameter to mapping driver and tell LPDDR drivers where PFOW window is in chip mapping. It leads to necessity of map_info structure extendoing. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/map.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index aa30244492c6..b981b8772217 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -223,6 +223,7 @@ struct map_info { must leave it enabled. */ void (*set_vpp)(struct map_info *, int); + unsigned long pfow_base; unsigned long map_priv_1; unsigned long map_priv_2; void *fldrv_priv; -- cgit v1.2.3 From d81408304b06a71c28417445202af9cd6673168d Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 16 Dec 2008 18:22:39 +0000 Subject: [MTD] LPDDR extended physmap driver to support LPDDR flash Physmap is a generic map driver for different platforms and flash types. We added support of LPDDR to physmap. All changes here are related to introduction of new pfow_base parameter. This parameter is valid in case of LPDDR chips only. Signed-off-by: Alexey Korolev Acked-by: Jared Hulbert Signed-off-by: David Woodhouse --- include/linux/mtd/physmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index c8e63a5ee72e..76f7cabf07d3 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h @@ -24,6 +24,7 @@ struct physmap_flash_data { unsigned int width; void (*set_vpp)(struct map_info *, int); unsigned int nr_parts; + unsigned int pfow_base; struct mtd_partition *parts; }; -- cgit v1.2.3 From be92d7af38fb8a91f8575ab2272e00f2e51667ff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 5 Jan 2009 14:34:42 +0100 Subject: genirq: provide irq_to_desc() to non-genirq architectures too Impact: build fix on non-genirq architectures Sam Ravnborg reported this build failure on sparc32 allmodconfig, the GPIO drivers assume the presence of irq_to_desc(): drivers/gpio/gpiolib.c: In function `gpiolib_dbg_show': drivers/gpio/gpiolib.c:1146: error: implicit declaration of function 'irq_to_desc' Add it in the !genirq case too. Reported-by: Sam Ravnborg Signed-off-by: Ingo Molnar Tested-by: Sam Ravnborg --- include/linux/irqnr.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 5504a5c97836..86af92e9e84c 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -8,7 +8,12 @@ #ifndef CONFIG_GENERIC_HARDIRQS #include -# define nr_irqs NR_IRQS + +/* + * Wrappers for non-genirq architectures: + */ +#define nr_irqs NR_IRQS +#define irq_to_desc(irq) (&irq_desc[irq]) # define for_each_irq_desc(irq, desc) \ for (irq = 0; irq < nr_irqs; irq++) -- cgit v1.2.3 From c42aa775cc8a8ca558db0cc75979fb8e16667447 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 20 Nov 2008 15:59:12 +0100 Subject: atmel-mci: move atmel-mci.h file to include/linux Needed to use the atmel-mci driver in an architecture independant maner. Signed-off-by: Nicolas Ferre Signed-off-by: Haavard Skinnemoen --- include/linux/atmel-mci.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 include/linux/atmel-mci.h (limited to 'include/linux') diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h new file mode 100644 index 000000000000..2a2213eefd85 --- /dev/null +++ b/include/linux/atmel-mci.h @@ -0,0 +1,39 @@ +#ifndef __LINUX_ATMEL_MCI_H +#define __LINUX_ATMEL_MCI_H + +#define ATMEL_MCI_MAX_NR_SLOTS 2 + +struct dma_slave; + +/** + * struct mci_slot_pdata - board-specific per-slot configuration + * @bus_width: Number of data lines wired up the slot + * @detect_pin: GPIO pin wired to the card detect switch + * @wp_pin: GPIO pin wired to the write protect sensor + * + * If a given slot is not present on the board, @bus_width should be + * set to 0. The other fields are ignored in this case. + * + * Any pins that aren't available should be set to a negative value. + * + * Note that support for multiple slots is experimental -- some cards + * might get upset if we don't get the clock management exactly right. + * But in most cases, it should work just fine. + */ +struct mci_slot_pdata { + unsigned int bus_width; + int detect_pin; + int wp_pin; +}; + +/** + * struct mci_platform_data - board-specific MMC/SDcard configuration + * @dma_slave: DMA slave interface to use in data transfers, or NULL. + * @slot: Per-slot configuration data. + */ +struct mci_platform_data { + struct dma_slave *dma_slave; + struct mci_slot_pdata slot[ATMEL_MCI_MAX_NR_SLOTS]; +}; + +#endif /* __LINUX_ATMEL_MCI_H */ -- cgit v1.2.3 From 74f783af95c982aef6d3a1415275650dcf511666 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 19 Aug 2008 14:51:22 +0200 Subject: quota: Add callbacks for allocating and destroying dquot structures Some filesystems would like to keep private information together with each dquot. Add callbacks alloc_dquot and destroy_dquot allowing filesystem to allocate larger dquots from their private slab in a similar fashion we currently allocate inodes. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quota.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 40401b554484..3ce708c2cb3c 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -292,6 +292,8 @@ struct dquot_operations { int (*free_inode) (const struct inode *, unsigned long); int (*transfer) (struct inode *, struct iattr *); int (*write_dquot) (struct dquot *); /* Ordinary dquot write */ + struct dquot *(*alloc_dquot)(struct super_block *, int); /* Allocate memory for new dquot */ + void (*destroy_dquot)(struct dquot *); /* Free memory for dquot */ int (*acquire_dquot) (struct dquot *); /* Quota is going to be created on disk */ int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */ int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */ -- cgit v1.2.3 From 12095460f7f315f8ef67a55b2194195d325d48d7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 20 Aug 2008 14:45:12 +0200 Subject: quota: Increase size of variables for limits and inode usage So far quota was fine with quota block limits and inode limits/numbers in a 32-bit type. Now with rapid increase in storage sizes there are coming requests to be able to handle quota limits above 4TB / more that 2^32 inodes. So bump up sizes of types in mem_dqblk structure to 64-bits to be able to handle this. Also update inode allocation / checking functions to use qsize_t and make global structure keep quota limits in bytes so that things are consistent. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quota.h | 28 ++++++++++++---------------- include/linux/quotaops.h | 4 ++-- 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 3ce708c2cb3c..9ea468363f9f 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -39,15 +39,6 @@ #define __DQUOT_VERSION__ "dquot_6.5.1" #define __DQUOT_NUM_VERSION__ 6*10000+5*100+1 -/* Size of blocks in which are counted size limits */ -#define QUOTABLOCK_BITS 10 -#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS) - -/* Conversion routines from and to quota blocks */ -#define qb2kb(x) ((x) << (QUOTABLOCK_BITS-10)) -#define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10)) -#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS) - #define MAXQUOTAS 2 #define USRQUOTA 0 /* element used for user quotas */ #define GRPQUOTA 1 /* element used for group quotas */ @@ -80,6 +71,11 @@ #define Q_GETQUOTA 0x800007 /* get user quota structure */ #define Q_SETQUOTA 0x800008 /* set user quota structure */ +/* Size of block in which space limits are passed through the quota + * interface */ +#define QIF_DQBLKSIZE_BITS 10 +#define QIF_DQBLKSIZE (1 << QIF_DQBLKSIZE_BITS) + /* * Quota structure used for communication with userspace via quotactl * Following flags are used to specify which fields are valid @@ -187,12 +183,12 @@ extern spinlock_t dq_data_lock; * Data for one user/group kept in memory */ struct mem_dqblk { - __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ - __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ + qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ + qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */ qsize_t dqb_curspace; /* current used space */ - __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ - __u32 dqb_isoftlimit; /* preferred inode limit */ - __u32 dqb_curinodes; /* current # allocated inodes */ + qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */ + qsize_t dqb_isoftlimit; /* preferred inode limit */ + qsize_t dqb_curinodes; /* current # allocated inodes */ time_t dqb_btime; /* time limit for excessive disk use */ time_t dqb_itime; /* time limit for excessive inode use */ }; @@ -287,9 +283,9 @@ struct dquot_operations { int (*initialize) (struct inode *, int); int (*drop) (struct inode *); int (*alloc_space) (struct inode *, qsize_t, int); - int (*alloc_inode) (const struct inode *, unsigned long); + int (*alloc_inode) (const struct inode *, qsize_t); int (*free_space) (struct inode *, qsize_t); - int (*free_inode) (const struct inode *, unsigned long); + int (*free_inode) (const struct inode *, qsize_t); int (*transfer) (struct inode *, struct iattr *); int (*write_dquot) (struct dquot *); /* Ordinary dquot write */ struct dquot *(*alloc_dquot)(struct super_block *, int); /* Allocate memory for new dquot */ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index a558a4c1d35a..adcc7ba3accb 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -26,10 +26,10 @@ int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); -int dquot_alloc_inode(const struct inode *inode, unsigned long number); +int dquot_alloc_inode(const struct inode *inode, qsize_t number); int dquot_free_space(struct inode *inode, qsize_t number); -int dquot_free_inode(const struct inode *inode, unsigned long number); +int dquot_free_inode(const struct inode *inode, qsize_t number); int dquot_transfer(struct inode *inode, struct iattr *iattr); int dquot_commit(struct dquot *dquot); -- cgit v1.2.3 From e4bc7b4b7ff783779b6928d55a9308910bf180a3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 20 Aug 2008 16:21:01 +0200 Subject: quota: Make _SUSPENDED just a flag Upto now, DQUOT_USR_SUSPENDED behaved like a state - i.e., either quota was enabled or suspended or none. Now allowed states are 0, ENABLED, ENABLED | SUSPENDED. This will be useful later when we implement separate enabling of quota usage tracking and limits enforcement because we need to keep track of a state which has been suspended. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quotaops.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index adcc7ba3accb..ffd97071cd1e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -67,8 +67,10 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type) static inline int sb_has_quota_enabled(struct super_block *sb, int type) { if (type == USRQUOTA) - return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED; - return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED; + return (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) + && !(sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED); + return (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED) + && !(sb_dqopt(sb)->flags & DQUOT_GROUP_SUSPENDED); } static inline int sb_any_quota_enabled(struct super_block *sb) -- cgit v1.2.3 From f55abc0fb9c3189de3da829adf3220322c0da43e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 20 Aug 2008 17:50:32 +0200 Subject: quota: Allow to separately enable quota accounting and enforcing limits Split DQUOT_USR_ENABLED (and DQUOT_GRP_ENABLED) into DQUOT_USR_USAGE_ENABLED and DQUOT_USR_LIMITS_ENABLED. This way we are able to separately enable / disable whether we should: 1) ignore quotas completely 2) just keep uptodate information about usage 3) actually enforce quota limits This is going to be useful when quota is treated as filesystem metadata - we then want to keep quota information uptodate all the time and just enable / disable limits enforcement. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quota.h | 30 +++++++++++++--- include/linux/quotaops.h | 91 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 97 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 9ea468363f9f..93717abcd35b 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -318,12 +318,34 @@ struct quota_format_type { struct quota_format_type *qf_next; }; -#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ -#define DQUOT_GRP_ENABLED 0x02 /* Group diskquotas enabled */ -#define DQUOT_USR_SUSPENDED 0x04 /* User diskquotas are off, but +/* Quota state flags - they actually come in two flavors - for users and groups */ +enum { + _DQUOT_USAGE_ENABLED = 0, /* Track disk usage for users */ + _DQUOT_LIMITS_ENABLED, /* Enforce quota limits for users */ + _DQUOT_SUSPENDED, /* User diskquotas are off, but * we have necessary info in * memory to turn them on */ -#define DQUOT_GRP_SUSPENDED 0x08 /* The same for group quotas */ + _DQUOT_STATE_FLAGS +}; +#define DQUOT_USAGE_ENABLED (1 << _DQUOT_USAGE_ENABLED) +#define DQUOT_LIMITS_ENABLED (1 << _DQUOT_LIMITS_ENABLED) +#define DQUOT_SUSPENDED (1 << _DQUOT_SUSPENDED) +#define DQUOT_STATE_FLAGS (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \ + DQUOT_SUSPENDED) + +static inline unsigned int dquot_state_flag(unsigned int flags, int type) +{ + if (type == USRQUOTA) + return flags; + return flags << _DQUOT_STATE_FLAGS; +} + +static inline unsigned int dquot_generic_flag(unsigned int flags, int type) +{ + if (type == USRQUOTA) + return flags; + return flags >> _DQUOT_STATE_FLAGS; +} struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index ffd97071cd1e..3b3346fa657c 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -40,11 +40,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot); int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, int remount); +int vfs_quota_enable(struct inode *inode, int type, int format_id, + unsigned int flags); int vfs_quota_on_path(struct super_block *sb, int type, int format_id, struct path *path); int vfs_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); int vfs_quota_off(struct super_block *sb, int type, int remount); +int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags); int vfs_quota_sync(struct super_block *sb, int type); int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); @@ -64,26 +67,22 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type) * Functions for checking status of quota */ -static inline int sb_has_quota_enabled(struct super_block *sb, int type) +static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type) { - if (type == USRQUOTA) - return (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) - && !(sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED); - return (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED) - && !(sb_dqopt(sb)->flags & DQUOT_GROUP_SUSPENDED); + return sb_dqopt(sb)->flags & + dquot_state_flag(DQUOT_USAGE_ENABLED, type); } -static inline int sb_any_quota_enabled(struct super_block *sb) +static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type) { - return sb_has_quota_enabled(sb, USRQUOTA) || - sb_has_quota_enabled(sb, GRPQUOTA); + return sb_dqopt(sb)->flags & + dquot_state_flag(DQUOT_LIMITS_ENABLED, type); } static inline int sb_has_quota_suspended(struct super_block *sb, int type) { - if (type == USRQUOTA) - return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED; - return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED; + return sb_dqopt(sb)->flags & + dquot_state_flag(DQUOT_SUSPENDED, type); } static inline int sb_any_quota_suspended(struct super_block *sb) @@ -92,6 +91,34 @@ static inline int sb_any_quota_suspended(struct super_block *sb) sb_has_quota_suspended(sb, GRPQUOTA); } +/* Does kernel know about any quota information for given sb + type? */ +static inline int sb_has_quota_loaded(struct super_block *sb, int type) +{ + /* Currently if anything is on, then quota usage is on as well */ + return sb_has_quota_usage_enabled(sb, type); +} + +static inline int sb_any_quota_loaded(struct super_block *sb) +{ + return sb_has_quota_loaded(sb, USRQUOTA) || + sb_has_quota_loaded(sb, GRPQUOTA); +} + +static inline int sb_has_quota_active(struct super_block *sb, int type) +{ + return sb_has_quota_loaded(sb, type) && + !sb_has_quota_suspended(sb, type); +} + +static inline int sb_any_quota_active(struct super_block *sb) +{ + return sb_has_quota_active(sb, USRQUOTA) || + sb_has_quota_active(sb, GRPQUOTA); +} + +/* For backward compatibility until we remove all users */ +#define sb_any_quota_enabled(sb) sb_any_quota_active(sb) + /* * Operations supported for diskquotas. */ @@ -106,7 +133,7 @@ extern struct quotactl_ops vfs_quotactl_ops; static inline void vfs_dq_init(struct inode *inode) { BUG_ON(!inode->i_sb); - if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) + if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) inode->i_sb->dq_op->initialize(inode, -1); } @@ -114,7 +141,7 @@ static inline void vfs_dq_init(struct inode *inode) * a transaction (deadlocks possible otherwise) */ static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (sb_any_quota_active(inode->i_sb)) { /* Used space is updated in alloc_space() */ if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) return 1; @@ -134,7 +161,7 @@ static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr) static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (sb_any_quota_active(inode->i_sb)) { /* Used space is updated in alloc_space() */ if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) return 1; @@ -154,7 +181,7 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr) static inline int vfs_dq_alloc_inode(struct inode *inode) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (sb_any_quota_active(inode->i_sb)) { vfs_dq_init(inode); if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) return 1; @@ -164,7 +191,7 @@ static inline int vfs_dq_alloc_inode(struct inode *inode) static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) + if (sb_any_quota_active(inode->i_sb)) inode->i_sb->dq_op->free_space(inode, nr); else inode_sub_bytes(inode, nr); @@ -178,7 +205,7 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr) static inline void vfs_dq_free_inode(struct inode *inode) { - if (sb_any_quota_enabled(inode->i_sb)) + if (sb_any_quota_active(inode->i_sb)) inode->i_sb->dq_op->free_inode(inode, 1); } @@ -199,12 +226,12 @@ static inline int vfs_dq_off(struct super_block *sb, int remount) #else -static inline int sb_has_quota_enabled(struct super_block *sb, int type) +static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type) { return 0; } -static inline int sb_any_quota_enabled(struct super_block *sb) +static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type) { return 0; } @@ -219,6 +246,30 @@ static inline int sb_any_quota_suspended(struct super_block *sb) return 0; } +/* Does kernel know about any quota information for given sb + type? */ +static inline int sb_has_quota_loaded(struct super_block *sb, int type) +{ + return 0; +} + +static inline int sb_any_quota_loaded(struct super_block *sb) +{ + return 0; +} + +static inline int sb_has_quota_active(struct super_block *sb, int type) +{ + return 0; +} + +static inline int sb_any_quota_active(struct super_block *sb) +{ + return 0; +} + +/* For backward compatibility until we remove all users */ +#define sb_any_quota_enabled(sb) sb_any_quota_active(sb) + /* * NO-OP when quota not configured. */ -- cgit v1.2.3 From dcb30695f2cac86b71417629a6fe8042b4fe2ab2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 20 Aug 2008 18:30:40 +0200 Subject: quota: Remove compatibility function sb_any_quota_enabled() Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quotaops.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 3b3346fa657c..e840ca523175 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -116,9 +116,6 @@ static inline int sb_any_quota_active(struct super_block *sb) sb_has_quota_active(sb, GRPQUOTA); } -/* For backward compatibility until we remove all users */ -#define sb_any_quota_enabled(sb) sb_any_quota_active(sb) - /* * Operations supported for diskquotas. */ @@ -267,9 +264,6 @@ static inline int sb_any_quota_active(struct super_block *sb) return 0; } -/* For backward compatibility until we remove all users */ -#define sb_any_quota_enabled(sb) sb_any_quota_active(sb) - /* * NO-OP when quota not configured. */ -- cgit v1.2.3 From ca785ec66b991e9ca74dd9840fc014487ad095e1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 30 Sep 2008 17:53:37 +0200 Subject: quota: Introduce DQUOT_QUOTA_SYS_FILE flag If filesystem can handle quota files as system files hidden from users, we can skip a lot of cache invalidation, syncing, inode flags setting etc. when turning quotas on, off and quota_sync. Allow filesystem to indicate that it is hiding quota files from users by DQUOT_QUOTA_SYS_FILE flag. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quota.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 93717abcd35b..80b8807b4988 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -332,6 +332,13 @@ enum { #define DQUOT_SUSPENDED (1 << _DQUOT_SUSPENDED) #define DQUOT_STATE_FLAGS (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \ DQUOT_SUSPENDED) +/* Other quota flags */ +#define DQUOT_QUOTA_SYS_FILE (1 << 6) /* Quota file is a special + * system file and user cannot + * touch it. Filesystem is + * responsible for setting + * S_NOQUOTA, S_NOATIME flags + */ static inline unsigned int dquot_state_flag(unsigned int flags, int type) { -- cgit v1.2.3 From cf770c137122b78470a67ebd5498947869a09197 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 21 Sep 2008 23:17:53 +0200 Subject: quota: Move quotaio_v[12].h from include/linux/ to fs/ Since these include files are used only by implementation of quota formats, there's no need to have them in include/linux/. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/Kbuild | 2 -- include/linux/quotaio_v1.h | 33 ------------------- include/linux/quotaio_v2.h | 79 ---------------------------------------------- 3 files changed, 114 deletions(-) delete mode 100644 include/linux/quotaio_v1.h delete mode 100644 include/linux/quotaio_v2.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 95ac82340c3b..900a787cbae9 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -134,8 +134,6 @@ header-y += posix_types.h header-y += ppdev.h header-y += prctl.h header-y += qnxtypes.h -header-y += quotaio_v1.h -header-y += quotaio_v2.h header-y += radeonfb.h header-y += raw.h header-y += resource.h diff --git a/include/linux/quotaio_v1.h b/include/linux/quotaio_v1.h deleted file mode 100644 index 746654b5de70..000000000000 --- a/include/linux/quotaio_v1.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _LINUX_QUOTAIO_V1_H -#define _LINUX_QUOTAIO_V1_H - -#include - -/* - * The following constants define the amount of time given a user - * before the soft limits are treated as hard limits (usually resulting - * in an allocation failure). The timer is started when the user crosses - * their soft limit, it is reset when they go below their soft limit. - */ -#define MAX_IQ_TIME 604800 /* (7*24*60*60) 1 week */ -#define MAX_DQ_TIME 604800 /* (7*24*60*60) 1 week */ - -/* - * The following structure defines the format of the disk quota file - * (as it appears on disk) - the file is an array of these structures - * indexed by user or group number. - */ -struct v1_disk_dqblk { - __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ - __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ - __u32 dqb_curblocks; /* current block count */ - __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ - __u32 dqb_isoftlimit; /* preferred inode limit */ - __u32 dqb_curinodes; /* current # allocated inodes */ - time_t dqb_btime; /* time limit for excessive disk use */ - time_t dqb_itime; /* time limit for excessive inode use */ -}; - -#define v1_dqoff(UID) ((loff_t)((UID) * sizeof (struct v1_disk_dqblk))) - -#endif /* _LINUX_QUOTAIO_V1_H */ diff --git a/include/linux/quotaio_v2.h b/include/linux/quotaio_v2.h deleted file mode 100644 index 303d7cbe30d4..000000000000 --- a/include/linux/quotaio_v2.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Definitions of structures for vfsv0 quota format - */ - -#ifndef _LINUX_QUOTAIO_V2_H -#define _LINUX_QUOTAIO_V2_H - -#include -#include - -/* - * Definitions of magics and versions of current quota files - */ -#define V2_INITQMAGICS {\ - 0xd9c01f11, /* USRQUOTA */\ - 0xd9c01927 /* GRPQUOTA */\ -} - -#define V2_INITQVERSIONS {\ - 0, /* USRQUOTA */\ - 0 /* GRPQUOTA */\ -} - -/* - * The following structure defines the format of the disk quota file - * (as it appears on disk) - the file is a radix tree whose leaves point - * to blocks of these structures. - */ -struct v2_disk_dqblk { - __le32 dqb_id; /* id this quota applies to */ - __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */ - __le32 dqb_isoftlimit; /* preferred inode limit */ - __le32 dqb_curinodes; /* current # allocated inodes */ - __le32 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */ - __le32 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */ - __le64 dqb_curspace; /* current space occupied (in bytes) */ - __le64 dqb_btime; /* time limit for excessive disk use */ - __le64 dqb_itime; /* time limit for excessive inode use */ -}; - -/* - * Here are header structures as written on disk and their in-memory copies - */ -/* First generic header */ -struct v2_disk_dqheader { - __le32 dqh_magic; /* Magic number identifying file */ - __le32 dqh_version; /* File version */ -}; - -/* Header with type and version specific information */ -struct v2_disk_dqinfo { - __le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */ - __le32 dqi_igrace; /* Time before inode soft limit becomes hard limit */ - __le32 dqi_flags; /* Flags for quotafile (DQF_*) */ - __le32 dqi_blocks; /* Number of blocks in file */ - __le32 dqi_free_blk; /* Number of first free block in the list */ - __le32 dqi_free_entry; /* Number of block with at least one free entry */ -}; - -/* - * Structure of header of block with quota structures. It is padded to 16 bytes so - * there will be space for exactly 21 quota-entries in a block - */ -struct v2_disk_dqdbheader { - __le32 dqdh_next_free; /* Number of next block with free entry */ - __le32 dqdh_prev_free; /* Number of previous block with free entry */ - __le16 dqdh_entries; /* Number of valid entries in block */ - __le16 dqdh_pad1; - __le32 dqdh_pad2; -}; - -#define V2_DQINFOOFF sizeof(struct v2_disk_dqheader) /* Offset of info header in file */ -#define V2_DQBLKSIZE_BITS 10 -#define V2_DQBLKSIZE (1 << V2_DQBLKSIZE_BITS) /* Size of block with quota structures */ -#define V2_DQTREEOFF 1 /* Offset of tree in file in blocks */ -#define V2_DQTREEDEPTH 4 /* Depth of quota tree */ -#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk)) /* Number of entries in one blocks */ - -#endif /* _LINUX_QUOTAIO_V2_H */ -- cgit v1.2.3 From 1ccd14b9c271c1ac6eec5c5ec5def433100e7248 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Sep 2008 05:54:49 +0200 Subject: quota: Split off quota tree handling into a separate file There is going to be a new version of quota format having 64-bit quota limits and a new quota format for OCFS2. They are both going to use the same tree structure as VFSv0 quota format. So split out tree handling into a separate file and make size of leaf blocks, amount of space usable in each block (needed for checksumming) and structures contained in them configurable so that the code can be shared. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/dqblk_qtree.h | 56 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/dqblk_v2.h | 19 +++++++-------- 2 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 include/linux/dqblk_qtree.h (limited to 'include/linux') diff --git a/include/linux/dqblk_qtree.h b/include/linux/dqblk_qtree.h new file mode 100644 index 000000000000..82a16527b367 --- /dev/null +++ b/include/linux/dqblk_qtree.h @@ -0,0 +1,56 @@ +/* + * Definitions of structures and functions for quota formats using trie + */ + +#ifndef _LINUX_DQBLK_QTREE_H +#define _LINUX_DQBLK_QTREE_H + +#include + +/* Numbers of blocks needed for updates - we count with the smallest + * possible block size (1024) */ +#define QTREE_INIT_ALLOC 4 +#define QTREE_INIT_REWRITE 2 +#define QTREE_DEL_ALLOC 0 +#define QTREE_DEL_REWRITE 6 + +struct dquot; + +/* Operations */ +struct qtree_fmt_operations { + void (*mem2disk_dqblk)(void *disk, struct dquot *dquot); /* Convert given entry from in memory format to disk one */ + void (*disk2mem_dqblk)(struct dquot *dquot, void *disk); /* Convert given entry from disk format to in memory one */ + int (*is_id)(void *disk, struct dquot *dquot); /* Is this structure for given id? */ +}; + +/* Inmemory copy of version specific information */ +struct qtree_mem_dqinfo { + struct super_block *dqi_sb; /* Sb quota is on */ + int dqi_type; /* Quota type */ + unsigned int dqi_blocks; /* # of blocks in quota file */ + unsigned int dqi_free_blk; /* First block in list of free blocks */ + unsigned int dqi_free_entry; /* First block with free entry */ + unsigned int dqi_blocksize_bits; /* Block size of quota file */ + unsigned int dqi_entry_size; /* Size of quota entry in quota file */ + unsigned int dqi_usable_bs; /* Space usable in block for quota data */ + unsigned int dqi_qtree_depth; /* Precomputed depth of quota tree */ + struct qtree_fmt_operations *dqi_ops; /* Operations for entry manipulation */ +}; + +int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); +int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); +int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); +int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); +int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk); +static inline int qtree_depth(struct qtree_mem_dqinfo *info) +{ + unsigned int epb = info->dqi_usable_bs >> 2; + unsigned long long entries = epb; + int i; + + for (i = 1; entries < (1ULL << 32); i++) + entries *= epb; + return i; +} + +#endif /* _LINUX_DQBLK_QTREE_H */ diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h index 4f853322cb7f..e5e22a787d58 100644 --- a/include/linux/dqblk_v2.h +++ b/include/linux/dqblk_v2.h @@ -1,26 +1,23 @@ /* - * Definitions of structures for vfsv0 quota format + * Definitions for vfsv0 quota format */ #ifndef _LINUX_DQBLK_V2_H #define _LINUX_DQBLK_V2_H -#include +#include -/* id numbers of quota format */ +/* Id number of quota format */ #define QFMT_VFS_V0 2 /* Numbers of blocks needed for updates */ -#define V2_INIT_ALLOC 4 -#define V2_INIT_REWRITE 2 -#define V2_DEL_ALLOC 0 -#define V2_DEL_REWRITE 6 +#define V2_INIT_ALLOC QTREE_INIT_ALLOC +#define V2_INIT_REWRITE QTREE_INIT_REWRITE +#define V2_DEL_ALLOC QTREE_DEL_ALLOC +#define V2_DEL_REWRITE QTREE_DEL_REWRITE -/* Inmemory copy of version specific information */ struct v2_mem_dqinfo { - unsigned int dqi_blocks; - unsigned int dqi_free_blk; - unsigned int dqi_free_entry; + struct qtree_mem_dqinfo i; }; #endif /* _LINUX_DQBLK_V2_H */ -- cgit v1.2.3 From e3d4d56b9715e40ded2a84d0d4fa7f3b6c58983c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Oct 2008 18:44:14 +0200 Subject: quota: Convert union in mem_dqinfo to a pointer Coming quota support for OCFS2 is going to need quite a bit of additional per-sb quota information. Moreover having fs.h include all the types needed for this structure would be a pain in the a**. So remove the union from mem_dqinfo and add a private pointer for filesystem's use. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/dqblk_v1.h | 4 ---- include/linux/dqblk_v2.h | 4 ---- include/linux/quota.h | 5 +---- 3 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h index 57f1250d5a52..9cea901f5bba 100644 --- a/include/linux/dqblk_v1.h +++ b/include/linux/dqblk_v1.h @@ -17,8 +17,4 @@ #define V1_DEL_ALLOC 0 #define V1_DEL_REWRITE 2 -/* Special information about quotafile */ -struct v1_mem_dqinfo { -}; - #endif /* _LINUX_DQBLK_V1_H */ diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h index e5e22a787d58..ff8af1b4bda7 100644 --- a/include/linux/dqblk_v2.h +++ b/include/linux/dqblk_v2.h @@ -16,8 +16,4 @@ #define V2_DEL_ALLOC QTREE_DEL_ALLOC #define V2_DEL_REWRITE QTREE_DEL_REWRITE -struct v2_mem_dqinfo { - struct qtree_mem_dqinfo i; -}; - #endif /* _LINUX_DQBLK_V2_H */ diff --git a/include/linux/quota.h b/include/linux/quota.h index 80b8807b4988..e51dfdc0aef0 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -208,10 +208,7 @@ struct mem_dqinfo { unsigned int dqi_igrace; qsize_t dqi_maxblimit; qsize_t dqi_maxilimit; - union { - struct v1_mem_dqinfo v1_i; - struct v2_mem_dqinfo v2_i; - } u; + void *dqi_priv; }; struct super_block; -- cgit v1.2.3 From db49d2df489f727096438706a5428115e84a3f0d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Oct 2008 18:21:39 +0200 Subject: quota: Allow negative usage of space and inodes For clustered filesystems, it can happen that space / inode usage goes negative temporarily (because some node is allocating another node is freeing and they are not completely in sync). So let quota code allow this and change qsize_t so a signed type so that we don't underflow the variables. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quota.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index e51dfdc0aef0..75bf761caef2 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -168,7 +168,7 @@ enum { #include typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ -typedef __u64 qsize_t; /* Type in which we store sizes */ +typedef long long qsize_t; /* Type in which we store sizes */ extern spinlock_t dq_data_lock; @@ -336,6 +336,7 @@ enum { * responsible for setting * S_NOQUOTA, S_NOATIME flags */ +#define DQUOT_NEGATIVE_USAGE (1 << 7) /* Allow negative quota usage */ static inline unsigned int dquot_state_flag(unsigned int flags, int type) { -- cgit v1.2.3 From 4d59bce4f9eaf26d6d9046b56a2f1c0c7f20981d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Oct 2008 16:48:10 +0200 Subject: quota: Keep which entries were set by SETQUOTA quotactl Quota in a clustered environment needs to synchronize quota information among cluster nodes. This means we have to occasionally update some information in dquot from disk / network. On the other hand we have to be careful not to overwrite changes administrator did via SETQUOTA. So indicate in dquot->dq_flags which entries have been set by SETQUOTA and quota format can clear these flags when it properly propagated the changes. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quota.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 75bf761caef2..6d98885c16da 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -80,12 +80,21 @@ * Quota structure used for communication with userspace via quotactl * Following flags are used to specify which fields are valid */ -#define QIF_BLIMITS 1 -#define QIF_SPACE 2 -#define QIF_ILIMITS 4 -#define QIF_INODES 8 -#define QIF_BTIME 16 -#define QIF_ITIME 32 +enum { + QIF_BLIMITS_B = 0, + QIF_SPACE_B, + QIF_ILIMITS_B, + QIF_INODES_B, + QIF_BTIME_B, + QIF_ITIME_B, +}; + +#define QIF_BLIMITS (1 << QIF_BLIMITS_B) +#define QIF_SPACE (1 << QIF_SPACE_B) +#define QIF_ILIMITS (1 << QIF_ILIMITS_B) +#define QIF_INODES (1 << QIF_INODES_B) +#define QIF_BTIME (1 << QIF_BTIME_B) +#define QIF_ITIME (1 << QIF_ITIME_B) #define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS) #define QIF_USAGE (QIF_SPACE | QIF_INODES) #define QIF_TIMES (QIF_BTIME | QIF_ITIME) @@ -242,6 +251,11 @@ extern struct dqstats dqstats; #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ +#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ + * for the mask of entries set via SETQUOTA\ + * quotactl. They are set under dq_data_lock\ + * and the quota format handling dquot can\ + * clear them when it sees fit. */ struct dquot { struct hlist_node dq_hash; /* Hash list in memory */ -- cgit v1.2.3 From 571b46e40bebb0d57130ca24c4a84dfd553adb91 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Oct 2008 09:17:52 +0100 Subject: quota: Update version number Increase reported version number of quota support since quota core has changed significantly. Also remove __DQUOT_NUM_VERSION__ since nobody uses it. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quota.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 6d98885c16da..ec82beb10424 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -36,8 +36,7 @@ #include #include -#define __DQUOT_VERSION__ "dquot_6.5.1" -#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1 +#define __DQUOT_VERSION__ "dquot_6.5.2" #define MAXQUOTAS 2 #define USRQUOTA 0 /* element used for user quotas */ -- cgit v1.2.3 From 3d9ea253a0e73dccaa869888ec2ceb17ea76c810 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 10 Oct 2008 16:12:23 +0200 Subject: quota: Add helpers to allow ocfs2 specific quota initialization, freeing and recovery OCFS2 needs to peek whether quota structure is already in memory so that it can avoid expensive cluster locking in that case. Similarly when freeing dquots, it checks whether it is the last quota structure user or not. Finally, it needs to get reference to dquot structure for specified id and quota type when recovering quota file after crash. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quotaops.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index e840ca523175..e3a10272d471 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -24,6 +24,10 @@ void sync_dquots(struct super_block *sb, int type); int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); +int dquot_drop_locked(struct inode *inode); +struct dquot *dqget(struct super_block *sb, unsigned int id, int type); +void dqput(struct dquot *dquot); +int dquot_is_cached(struct super_block *sb, unsigned int id, int type); int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); int dquot_alloc_inode(const struct inode *inode, qsize_t number); -- cgit v1.2.3 From 12c77527e4138bc3b17d17b0e0c909e4fc84924f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 20 Oct 2008 17:05:00 +0200 Subject: quota: Implement function for scanning active dquots OCFS2 needs to scan all active dquots once in a while and sync quota information among cluster nodes. Provide a helper function for it so that it does not have to reimplement internally a list which VFS already has. Moreover this function is probably going to be useful for other clustered filesystems if they decide to use VFS quotas. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quotaops.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index e3a10272d471..f4913948c305 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -28,6 +28,9 @@ int dquot_drop_locked(struct inode *inode); struct dquot *dqget(struct super_block *sb, unsigned int id, int type); void dqput(struct dquot *dquot); int dquot_is_cached(struct super_block *sb, unsigned int id, int type); +int dquot_scan_active(struct super_block *sb, + int (*fn)(struct dquot *dquot, unsigned long priv), + unsigned long priv); int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); int dquot_alloc_inode(const struct inode *inode, qsize_t number); -- cgit v1.2.3 From e97fcd95a4778a8caf1980c6c72fdf68185a0838 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 18 Nov 2008 17:15:24 -0800 Subject: jbd2: Add BH_JBDPrivateStart Add this so that file systems using JBD2 can safely allocate unused b_state bits. In this case, we add it so that Ocfs2 can define a single bit for tracking the validation state of a buffer. Acked-by: "Theodore Ts'o" Signed-off-by: Mark Fasheh --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index c7d106ef22e2..f36645745489 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -329,6 +329,7 @@ enum jbd_state_bits { BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ + BH_JBDPrivateStart, /* First bit available for private use by FS */ }; BUFFER_FNS(JBD, jbd) -- cgit v1.2.3 From 5cd9d5bb86daf632a40f90e2321ea9379e42f073 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Nov 2008 15:31:31 +0100 Subject: quota: Unexport dqblk_v1.h and dqblk_v2.h Unexport header files dqblk_v[12].h since except for quota format ID they don't contain information userspace should be interested in. Move ID definitions to quota.h. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/Kbuild | 2 -- include/linux/dqblk_v1.h | 3 --- include/linux/dqblk_v2.h | 3 --- include/linux/quota.h | 4 ++++ 4 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 900a787cbae9..39da666067b9 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -56,8 +56,6 @@ header-y += dlm_device.h header-y += dlm_netlink.h header-y += dm-ioctl.h header-y += dn.h -header-y += dqblk_v1.h -header-y += dqblk_v2.h header-y += dqblk_xfs.h header-y += efs_fs_sb.h header-y += elf-fdpic.h diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h index 9cea901f5bba..3713a7232dd8 100644 --- a/include/linux/dqblk_v1.h +++ b/include/linux/dqblk_v1.h @@ -5,9 +5,6 @@ #ifndef _LINUX_DQBLK_V1_H #define _LINUX_DQBLK_V1_H -/* Id of quota format */ -#define QFMT_VFS_OLD 1 - /* Root squash turned on */ #define V1_DQF_RSQUASH 1 diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h index ff8af1b4bda7..18000a542677 100644 --- a/include/linux/dqblk_v2.h +++ b/include/linux/dqblk_v2.h @@ -7,9 +7,6 @@ #include -/* Id number of quota format */ -#define QFMT_VFS_V0 2 - /* Numbers of blocks needed for updates */ #define V2_INIT_ALLOC QTREE_INIT_ALLOC #define V2_INIT_REWRITE QTREE_INIT_REWRITE diff --git a/include/linux/quota.h b/include/linux/quota.h index ec82beb10424..d72d5d84fde5 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -70,6 +70,10 @@ #define Q_GETQUOTA 0x800007 /* get user quota structure */ #define Q_SETQUOTA 0x800008 /* set user quota structure */ +/* Quota format type IDs */ +#define QFMT_VFS_OLD 1 +#define QFMT_VFS_V0 2 + /* Size of block in which space limits are passed through the quota * interface */ #define QIF_DQBLKSIZE_BITS 10 -- cgit v1.2.3 From 7d9056ba20ebed6e3937a2e23183f6117919cb00 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Nov 2008 15:31:32 +0100 Subject: quota: Export dquot_alloc() and dquot_destroy() functions These are default functions for creating and destroying quota structures and they should be used from filesystems. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- include/linux/quotaops.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index f4913948c305..21b781a3350f 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -31,6 +31,8 @@ int dquot_is_cached(struct super_block *sb, unsigned int id, int type); int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), unsigned long priv); +struct dquot *dquot_alloc(struct super_block *sb, int type); +void dquot_destroy(struct dquot *dquot); int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); int dquot_alloc_inode(const struct inode *inode, qsize_t number); -- cgit v1.2.3 From e06c8227fd94ec181849ba206bf032be31c4295c Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 11 Sep 2008 15:35:47 -0700 Subject: jbd2: Add buffer triggers Filesystems often to do compute intensive operation on some metadata. If this operation is repeated many times, it can be very expensive. It would be much nicer if the operation could be performed once before a buffer goes to disk. This adds triggers to jbd2 buffer heads. Just before writing a metadata buffer to the journal, jbd2 will optionally call a commit trigger associated with the buffer. If the journal is aborted, an abort trigger will be called on any dirty buffers as they are dropped from pending transactions. ocfs2 will use this feature. Initially I tried to come up with a more generic trigger that could be used for non-buffer-related events like transaction completion. It doesn't tie nicely, because the information a buffer trigger needs (specific to a journal_head) isn't the same as what a transaction trigger needs (specific to a tranaction_t or perhaps journal_t). So I implemented a buffer set, with the understanding that journal/transaction wide triggers should be implemented separately. There is only one trigger set allowed per buffer. I can't think of any reason to attach more than one set. Contrast this with a journal or transaction in which multiple places may want to watch the entire transaction separately. The trigger sets are considered static allocation from the jbd2 perspective. ocfs2 will just have one trigger set per block type, setting the same set on every bh of the same type. Signed-off-by: Joel Becker Cc: "Theodore Ts'o" Cc: Signed-off-by: Mark Fasheh --- include/linux/jbd2.h | 31 +++++++++++++++++++++++++++++++ include/linux/journal-head.h | 8 ++++++++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f36645745489..34456476e761 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1008,6 +1008,35 @@ int __jbd2_journal_clean_checkpoint_list(journal_t *journal); int __jbd2_journal_remove_checkpoint(struct journal_head *); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); + +/* + * Triggers + */ + +struct jbd2_buffer_trigger_type { + /* + * Fired just before a buffer is written to the journal. + * mapped_data is a mapped buffer that is the frozen data for + * commit. + */ + void (*t_commit)(struct jbd2_buffer_trigger_type *type, + struct buffer_head *bh, void *mapped_data, + size_t size); + + /* + * Fired during journal abort for dirty buffers that will not be + * committed. + */ + void (*t_abort)(struct jbd2_buffer_trigger_type *type, + struct buffer_head *bh); +}; + +extern void jbd2_buffer_commit_trigger(struct journal_head *jh, + void *mapped_data, + struct jbd2_buffer_trigger_type *triggers); +extern void jbd2_buffer_abort_trigger(struct journal_head *jh, + struct jbd2_buffer_trigger_type *triggers); + /* Buffer IO */ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, @@ -1046,6 +1075,8 @@ extern int jbd2_journal_extend (handle_t *, int nblocks); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); +void jbd2_journal_set_triggers(struct buffer_head *, + struct jbd2_buffer_trigger_type *type); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h index bb70ebb6a2d5..525aac3c97df 100644 --- a/include/linux/journal-head.h +++ b/include/linux/journal-head.h @@ -12,6 +12,8 @@ typedef unsigned int tid_t; /* Unique transaction ID */ typedef struct transaction_s transaction_t; /* Compound transaction type */ + + struct buffer_head; struct journal_head { @@ -87,6 +89,12 @@ struct journal_head { * [j_list_lock] */ struct journal_head *b_cpnext, *b_cpprev; + + /* Trigger type */ + struct jbd2_buffer_trigger_type *b_triggers; + + /* Trigger type for the committing transaction's frozen data */ + struct jbd2_buffer_trigger_type *b_frozen_triggers; }; #endif /* JOURNAL_HEAD_H_INCLUDED */ -- cgit v1.2.3 From 4c728ef583b3d82266584da5cb068294c09df31e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Dec 2008 21:11:15 +0100 Subject: add a vfs_fsync helper Fsync currently has a fdatawrite/fdatawait pair around the method call, and a mutex_lock/unlock of the inode mutex. All callers of fsync have to duplicate this, but we have a few and most of them don't quite get it right. This patch adds a new vfs_fsync that takes care of this. It's a little more complicated as usual as ->fsync might get a NULL file pointer and just a dentry from nfsd, but otherwise gets afile and we want to take the mapping and file operations from it when it is there. Notes on the fsync callers: - ecryptfs wasn't calling filemap_fdatawrite / filemap_fdatawait on the lower file - coda wasn't calling filemap_fdatawrite / filemap_fdatawait on the host file, and returning 0 when ->fsync was missing - shm wasn't calling either filemap_fdatawrite / filemap_fdatawait nor taking i_mutex. Now given that shared memory doesn't have disk backing not doing anything in fsync seems fine and I left it out of the vfs_fsync conversion for now, but in that case we might just not pass it through to the lower file at all but just call the no-op simple_sync_file directly. [and now actually export vfs_fsync] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e2170ee21e18..9ad9eac9eb0c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1827,7 +1827,7 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); -extern long do_fsync(struct file *file, int datasync); +extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); extern void sync_filesystems(int wait); extern void __fsync_super(struct super_block *sb); -- cgit v1.2.3 From 4ae8978cf92a96257cd8998a49e781be83571d64 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 5 Jan 2009 07:19:16 -0500 Subject: inotify: fix type errors in interfaces The problems lie in the types used for some inotify interfaces, both at the kernel level and at the glibc level. This mail addresses the kernel problem. I will follow up with some suggestions for glibc changes. For the sys_inotify_rm_watch() interface, the type of the 'wd' argument is currently 'u32', it should be '__s32' . That is Robert's suggestion, and is consistent with the other declarations of watch descriptors in the kernel source, in particular, the inotify_event structure in include/linux/inotify.h: struct inotify_event { __s32 wd; /* watch descriptor */ __u32 mask; /* watch mask */ __u32 cookie; /* cookie to synchronize two events */ __u32 len; /* length (including nulls) of name */ char name[0]; /* stub for possible name */ }; The patch makes the changes needed for inotify_rm_watch(). Signed-off-by: Michael Kerrisk Cc: Robert Love Cc: Vegard Nossum Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 04fb47bfb920..18d0a243a7b3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -549,7 +549,7 @@ asmlinkage long sys_inotify_init(void); asmlinkage long sys_inotify_init1(int flags); asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask); -asmlinkage long sys_inotify_rm_watch(int fd, u32 wd); +asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd); asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus); -- cgit v1.2.3 From 07f2211e4fbce6990722d78c4f04225da9c0e9cf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Jan 2009 17:14:31 -0700 Subject: dmaengine: remove dependency on async_tx async_tx.ko is a consumer of dma channels. A circular dependency arises if modules in drivers/dma rely on common code in async_tx.ko. It prevents either module from being unloaded. Move dma_wait_for_async_tx and async_tx_run_dependencies to dmaeninge.o where they should have been from the beginning. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/async_tx.h | 15 --------------- include/linux/dmaengine.h | 9 +++++++++ 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 0f50d4cc4360..1c816775f135 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -60,8 +60,6 @@ enum async_tx_flags { #ifdef CONFIG_DMA_ENGINE void async_tx_issue_pending_all(void); -enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); -void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx); #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else @@ -77,19 +75,6 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } -static inline enum dma_status -dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) -{ - return DMA_SUCCESS; -} - -static inline void -async_tx_run_dependencies(struct dma_async_tx_descriptor *tx, - struct dma_chan *host_chan) -{ - do { } while (0); -} - static inline struct dma_chan * async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, enum dma_transaction_type tx_type, struct page **dst, int dst_count, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index adb0b084eb5a..e4ec7e7b8056 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -475,11 +475,20 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, } enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); +#ifdef CONFIG_DMA_ENGINE +enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); +#else +static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) +{ + return DMA_SUCCESS; +} +#endif /* --- DMA device --- */ int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); +void dma_run_dependencies(struct dma_async_tx_descriptor *tx); /* --- Helper iov-locking functions --- */ -- cgit v1.2.3 From e8c82c2e23e3527e0c9dc195e432c16784d270fa Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 03:05:50 +0100 Subject: mm lockless pagecache barrier fix An XFS workload showed up a bug in the lockless pagecache patch. Basically it would go into an "infinite" loop, although it would sometimes be able to break out of the loop! The reason is a missing compiler barrier in the "increment reference count unless it was zero" case of the lockless pagecache protocol in the gang lookup functions. This would cause the compiler to use a cached value of struct page pointer to retry the operation with, rather than reload it. So the page might have been removed from pagecache and freed (refcount==0) but the lookup would not correctly notice the page is no longer in pagecache, and keep attempting to increment the refcount and failing, until the page gets reallocated for something else. This isn't a data corruption because the condition will be detected if the page has been reallocated. However it can result in a lockup. Linus points out that ACCESS_ONCE is also required in that pointer load, even if it's absence is not causing a bug on our particular build. The most general way to solve this is just to put an rcu_dereference in radix_tree_deref_slot. Assembly of find_get_pages, before: .L220: movq (%rbx), %rax #* ivtmp.1162, tmp82 movq (%rax), %rdi #, prephitmp.1149 .L218: testb $1, %dil #, prephitmp.1149 jne .L217 #, testq %rdi, %rdi # prephitmp.1149 je .L203 #, cmpq $-1, %rdi #, prephitmp.1149 je .L217 #, movl 8(%rdi), %esi # ._count.counter, c testl %esi, %esi # c je .L218 #, after: .L212: movq (%rbx), %rax #* ivtmp.1109, tmp81 movq (%rax), %rdi #, ret testb $1, %dil #, ret jne .L211 #, testq %rdi, %rdi # ret je .L197 #, cmpq $-1, %rdi #, ret je .L211 #, movl 8(%rdi), %esi # ._count.counter, c testl %esi, %esi # c je .L212 #, (notice the obvious infinite loop in the first example, if page->count remains 0) Signed-off-by: Nick Piggin Reviewed-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index a916c6660dfa..355f6e80db0d 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -136,7 +136,7 @@ do { \ */ static inline void *radix_tree_deref_slot(void **pslot) { - void *ret = *pslot; + void *ret = rcu_dereference(*pslot); if (unlikely(radix_tree_is_indirect_ptr(ret))) ret = RADIX_TREE_RETRY; return ret; -- cgit v1.2.3 From 10d3bd09a3c25df114f74f7f86e1b58d070bef32 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 6 Jan 2009 03:04:58 +0000 Subject: dm: consolidate target deregistration error handling Change dm_unregister_target to return void and use BUG() for error reporting. dm_unregister_target can only fail because of programming bug in the target driver. It can't fail because of user's behavior or disk errors. This patch changes unregister_target to return void and use BUG if someone tries to unregister non-registered target or unregister target that is in use. This patch removes code duplication (testing of error codes in all dm targets) and reports bugs in just one place, in dm_unregister_target. In some target drivers, these return codes were ignored, which could lead to a situation where bugs could be missed. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index c17fd334e574..89ff2df40240 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -157,8 +157,7 @@ struct dm_target { }; int dm_register_target(struct target_type *t); -int dm_unregister_target(struct target_type *t); - +void dm_unregister_target(struct target_type *t); /*----------------------------------------------------------------- * Functions for creating and manipulating mapped devices. @@ -276,6 +275,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); *---------------------------------------------------------------*/ #define DM_NAME "device-mapper" +#define DMCRIT(f, arg...) \ + printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) + #define DMERR(f, arg...) \ printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) #define DMERR_LIMIT(f, arg...) \ -- cgit v1.2.3 From 7d76345da6ed3927c9cbf5d3f7a7021e8bba7374 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Tue, 6 Jan 2009 03:05:07 +0000 Subject: dm request: extend target interface This patch adds the following target interfaces for request-based dm. map_rq : for mapping a request rq_end_io : for finishing a request busy : for avoiding performance regression from bio-based dm. Target can tell dm core not to map requests now, and that may help requests in the block layer queue to be bigger by I/O merging. In bio-based dm, this behavior is done by device drivers managing the block layer queue. But in request-based dm, dm core has to do that since dm core manages the block layer queue. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 89ff2df40240..c1ba76c7c0e5 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -45,6 +45,8 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); */ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, union map_info *map_context); +typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, + union map_info *map_context); /* * Returns: @@ -57,6 +59,9 @@ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, typedef int (*dm_endio_fn) (struct dm_target *ti, struct bio *bio, int error, union map_info *map_context); +typedef int (*dm_request_endio_fn) (struct dm_target *ti, + struct request *clone, int error, + union map_info *map_context); typedef void (*dm_flush_fn) (struct dm_target *ti); typedef void (*dm_presuspend_fn) (struct dm_target *ti); @@ -75,6 +80,13 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd, typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, struct bio_vec *biovec, int max_size); +/* + * Returns: + * 0: The target can handle the next I/O immediately. + * 1: The target can't handle the next I/O immediately. + */ +typedef int (*dm_busy_fn) (struct dm_target *ti); + void dm_error(const char *message); /* @@ -107,7 +119,9 @@ struct target_type { dm_ctr_fn ctr; dm_dtr_fn dtr; dm_map_fn map; + dm_map_request_fn map_rq; dm_endio_fn end_io; + dm_request_endio_fn rq_end_io; dm_flush_fn flush; dm_presuspend_fn presuspend; dm_postsuspend_fn postsuspend; @@ -117,6 +131,7 @@ struct target_type { dm_message_fn message; dm_ioctl_fn ioctl; dm_merge_fn merge; + dm_busy_fn busy; }; struct io_restrictions { -- cgit v1.2.3 From ab4c1424882be9cd70b89abf2b484add355712fa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 6 Jan 2009 03:05:09 +0000 Subject: dm: support barriers on simple devices Implement barrier support for single device DM devices This patch implements barrier support in DM for the common case of dm linear just remapping a single underlying device. In this case we can safely pass the barrier through because there can be no reordering between devices. NB. Any DM device might cease to support barriers if it gets reconfigured so code must continue to allow for a possible -EOPNOTSUPP on every barrier bio submitted. - agk Signed-off-by: Andi Kleen Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index c1ba76c7c0e5..8209e08969f9 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -112,7 +112,14 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d); /* * Information about a target type */ + +/* + * Target features + */ +#define DM_TARGET_SUPPORTS_BARRIERS 0x00000001 + struct target_type { + uint64_t features; const char *name; struct module *module; unsigned version[3]; -- cgit v1.2.3 From b3881f74b31b7d47d0f1c4d89ac3e7f0b9c05e3e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 22:46:26 -0500 Subject: ext4: Add mount option to set kjournald's I/O priority Signed-off-by: "Theodore Ts'o" Cc: Jens Axboe --- include/linux/ioprio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index f98a656b17e5..76dad4808847 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -86,4 +86,6 @@ static inline int task_nice_ioclass(struct task_struct *task) */ extern int ioprio_best(unsigned short aprio, unsigned short bprio); +extern int set_task_ioprio(struct task_struct *task, int ioprio); + #endif -- cgit v1.2.3 From 835481d9bcd65720b473db6b38746a74a3964218 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 4 Jan 2009 05:18:06 -0800 Subject: cpumask: convert struct cpufreq_policy to cpumask_var_t Impact: use new cpumask API to reduce memory usage This is part of an effort to reduce structure sizes for machines configured with large NR_CPUS. cpumask_t gets replaced by cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or struct cpumask * (large NR_CPUS). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Dave Jones Signed-off-by: Ingo Molnar --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 484b3abf61bb..384b38d3e8e2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,8 +80,8 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_t cpus; /* CPUs requiring sw coordination */ - cpumask_t related_cpus; /* CPUs with any coordination */ + cpumask_var_t cpus; /* CPUs requiring sw coordination */ + cpumask_var_t related_cpus; /* CPUs with any coordination */ unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of registered CPU */ -- cgit v1.2.3 From 025dfdafe77f20b3890981a394774baab7b9c827 Mon Sep 17 00:00:00 2001 From: Frederik Schwarzer Date: Thu, 16 Oct 2008 19:02:37 +0200 Subject: trivial: fix then -> than typos in comments and documentation - (better, more, bigger ...) then -> (...) than Signed-off-by: Frederik Schwarzer Signed-off-by: Jiri Kosina --- include/linux/mtd/mtd.h | 2 +- include/linux/spi/spi.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eae26bb6430a..64433eb411d7 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -83,7 +83,7 @@ typedef enum { * @datbuf: data buffer - if NULL only oob data are read/written * @oobbuf: oob data buffer * - * Note, it is allowed to read more then one OOB area at one go, but not write. + * Note, it is allowed to read more than one OOB area at one go, but not write. * The interface assumes that the OOB write requests program only one page's * OOB area. */ diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 82229317753d..68bb1c501d0d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -327,9 +327,9 @@ extern struct spi_master *spi_busnum_to_master(u16 busnum); * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped * @len: size of rx and tx buffers (in bytes) - * @speed_hz: Select a speed other then the device default for this + * @speed_hz: Select a speed other than the device default for this * transfer. If 0 the default (from @spi_device) is used. - * @bits_per_word: select a bits_per_word other then the device default + * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @cs_change: affects chipselect after this transfer completes * @delay_usecs: microseconds to delay after this transfer before -- cgit v1.2.3 From 0211a9c8508b2183e0e539509aad60414f1c3813 Mon Sep 17 00:00:00 2001 From: Frederik Schwarzer Date: Mon, 29 Dec 2008 22:14:56 +0100 Subject: trivial: fix an -> a typos in documentation and comments It is always "an" if there is a vowel _spoken_ (not written). So it is: "an hour" (spoken vowel) but "a uniform" (spoken 'j') Signed-off-by: Frederik Schwarzer Signed-off-by: Jiri Kosina --- include/linux/ncp_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 9f2d76347f19..f69e66d151cc 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -87,7 +87,7 @@ struct ncp_objectname_ioctl #define NCP_AUTH_NDS 0x32 int auth_type; size_t object_name_len; - void __user * object_name; /* an userspace data, in most cases user name */ + void __user * object_name; /* a userspace data, in most cases user name */ }; struct ncp_privatedata_ioctl -- cgit v1.2.3 From bd53cbcce501b61921a1af2dddfa87c5b9923dfd Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:48 +0100 Subject: ide: add ->cur_port to struct ide_host and use it for serialized hosts * Pass 'ide_hwif_t *' instead of 'ide_hwgroup_t *' to unexpected_intr(). * Cache pointer to the port currently being serviced in ->cur_port and use it instead of hwif->hwgroup on serialized hosts. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index db5ef8ae1ab9..3de13df8bcef 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -852,6 +852,7 @@ struct ide_host { unsigned int (*init_chipset)(struct pci_dev *); unsigned long host_flags; void *host_priv; + ide_hwif_t *cur_port; /* for hosts requiring serialization */ }; /* -- cgit v1.2.3 From ae86afaee6a1c77c7a06d81dcc3bf872204d3bec Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:48 +0100 Subject: ide: use per-port IRQ handlers Use hwif instead of hwgroup as {request,free}_irq()'s cookie, teach ide_intr() to return early for non-active serialized ports, modify unexpected_intr() accordingly and then use per-port IRQ handlers instead of per-hwgroup ones. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 3de13df8bcef..f5382ad0bd4c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1274,14 +1274,14 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); -static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup) +static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup, ide_hwif_t *hwif) { if (hwgroup->busy) return 1; hwgroup->busy = 1; /* for atari only */ - ide_get_lock(ide_intr, hwgroup); + ide_get_lock(ide_intr, hwif); return 0; } -- cgit v1.2.3 From efe0397eef544ac4bcca23d39aa8d5db154952e0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:49 +0100 Subject: ide: remove hwgroup->hwif and {drive,hwif}->next * Add 'int port_count' field to ide_hwgroup_t to keep the track of the number of ports in the hwgroup. Then update init_irq() and ide_remove_port_from_hwgroup() to use it. * Remove no longer needed hwgroup->hwif, {drive,hwif}->next, ide_add_drive_to_hwgroup() and ide_remove_drive_from_hwgroup() (hwgroup->drive now only denotes the currently active device in the hwgroup). * Update locking documentation in . While at it: * Rename ->drive field in ide_hwgroup_t to ->cur_dev. * Use __func__ in ide_timer_expiry(). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f5382ad0bd4c..8b74ccdd221c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -588,7 +588,6 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ struct request *rq; /* current request */ - struct ide_drive_s *next; /* circular list of hwgroup drives */ void *driver_data; /* extra driver data */ u16 *id; /* identification info */ #ifdef CONFIG_IDE_PROC_FS @@ -750,7 +749,6 @@ struct ide_dma_ops { struct ide_host; typedef struct hwif_s { - struct hwif_s *next; /* for linked-list in ide_hwgroup_t */ struct hwif_s *mate; /* other hwif from same PCI chip */ struct hwgroup_s *hwgroup; /* actually (ide_hwgroup_t *) */ struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ @@ -874,9 +872,7 @@ typedef struct hwgroup_s { unsigned int polling : 1; /* current drive */ - ide_drive_t *drive; - /* ptr to current hwif in linked-list */ - ide_hwif_t *hwif; + ide_drive_t *cur_dev; /* current request */ struct request *rq; @@ -892,6 +888,8 @@ typedef struct hwgroup_s { int req_gen_timer; spinlock_t lock; + + int port_count; } ide_hwgroup_t; typedef struct ide_driver_s ide_driver_t; @@ -1622,12 +1620,7 @@ extern struct mutex ide_cfg_mtx; /* * Structure locking: * - * ide_cfg_mtx and hwgroup->lock together protect changes to - * ide_hwif_t->next - * ide_drive_t->next - * * ide_hwgroup_t->busy: hwgroup->lock - * ide_hwgroup_t->hwif: hwgroup->lock * ide_hwif_t->{hwgroup,mate}: constant, no locking * ide_drive_t->hwif: constant, no locking */ -- cgit v1.2.3 From 5b31f855f10d0053e738baa6d91fb6a3fad35119 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:49 +0100 Subject: ide: use lock bitops for ports serialization (v2) * Add ->host_busy field to struct ide_host and use it's first bit together with lock bitops to provide new ports serialization method. * Convert core IDE code to use new ide_[un]lock_host() helpers. This removes the need for taking hwgroup->lock if host is already busy on serialized hosts and makes it possible to merge ide_hwgroup_t into ide_hwif_t (done in the later patch). * Remove no longer needed ide_hwgroup_t.busy and ide_[un]lock_hwgroup(). * Update do_ide_request() documentation. v2: * ide_release_lock() should be called inside IDE_HFLAG_SERIALIZE check. * Add ide_hwif_t.busy flag and ide_[un]lock_port() for serializing devices on a port. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 8b74ccdd221c..00df155b5a02 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -828,6 +828,7 @@ typedef struct hwif_s { unsigned present : 1; /* this interface exists */ unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ + unsigned busy : 1; /* serializes devices on a port */ struct device gendev; struct device *portdev; @@ -851,8 +852,13 @@ struct ide_host { unsigned long host_flags; void *host_priv; ide_hwif_t *cur_port; /* for hosts requiring serialization */ + + /* used for hosts requiring serialization */ + volatile long host_busy; }; +#define IDE_HOST_BUSY 0 + /* * internal ide interrupt handler type */ @@ -866,8 +872,6 @@ typedef struct hwgroup_s { /* irq handler, if active */ ide_startstop_t (*handler)(ide_drive_t *); - /* BOOL: protects all fields below */ - volatile int busy; /* BOOL: polling active & poll_timeout field valid */ unsigned int polling : 1; @@ -1271,26 +1275,6 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); - -static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup, ide_hwif_t *hwif) -{ - if (hwgroup->busy) - return 1; - - hwgroup->busy = 1; - /* for atari only */ - ide_get_lock(ide_intr, hwif); - - return 0; -} - -static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup) -{ - /* for atari only */ - ide_release_lock(); - hwgroup->busy = 0; -} - extern void do_ide_request(struct request_queue *); void ide_init_disk(struct gendisk *, ide_drive_t *); @@ -1617,13 +1601,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive) extern spinlock_t ide_lock; extern struct mutex ide_cfg_mtx; -/* - * Structure locking: - * - * ide_hwgroup_t->busy: hwgroup->lock - * ide_hwif_t->{hwgroup,mate}: constant, no locking - * ide_drive_t->hwif: constant, no locking - */ #define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) -- cgit v1.2.3 From b65fac32cfe3b2f98cd472fef400bd1c1340de23 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:50 +0100 Subject: ide: merge ide_hwgroup_t with ide_hwif_t (v2) * Merge ide_hwgroup_t with ide_hwif_t. * Cleanup init_irq() accordingly, then remove no longer needed ide_remove_port_from_hwgroup() and ide_ports[]. * Remove now unused HWGROUP() macro. While at it: * ide_dump_ata_error() fixups v2: * Fix ->quirk_list check in do_ide_request() (s/hwif->cur_dev/prev_port->cur_dev). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 55 +++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 00df155b5a02..f27f130ba000 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -42,7 +42,6 @@ typedef unsigned char byte; /* used everywhere */ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ #define HWIF(drive) ((ide_hwif_t *)((drive)->hwif)) -#define HWGROUP(drive) ((ide_hwgroup_t *)(HWIF(drive)->hwgroup)) /* * Definitions for accessing IDE controller registers @@ -750,7 +749,6 @@ struct ide_host; typedef struct hwif_s { struct hwif_s *mate; /* other hwif from same PCI chip */ - struct hwgroup_s *hwgroup; /* actually (ide_hwgroup_t *) */ struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ struct ide_host *host; @@ -840,6 +838,30 @@ typedef struct hwif_s { #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_hwif_link *acpidata; #endif + + /* IRQ handler, if active */ + ide_startstop_t (*handler)(ide_drive_t *); + + /* BOOL: polling active & poll_timeout field valid */ + unsigned int polling : 1; + + /* current drive */ + ide_drive_t *cur_dev; + + /* current request */ + struct request *rq; + + /* failsafe timer */ + struct timer_list timer; + /* timeout value during long polls */ + unsigned long poll_timeout; + /* queried upon timeouts */ + int (*expiry)(ide_drive_t *); + + int req_gen; + int req_gen_timer; + + spinlock_t lock; } ____cacheline_internodealigned_in_smp ide_hwif_t; #define MAX_HOST_PORTS 4 @@ -868,34 +890,6 @@ typedef int (ide_expiry_t)(ide_drive_t *); /* used by ide-cd, ide-floppy, etc. */ typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned); -typedef struct hwgroup_s { - /* irq handler, if active */ - ide_startstop_t (*handler)(ide_drive_t *); - - /* BOOL: polling active & poll_timeout field valid */ - unsigned int polling : 1; - - /* current drive */ - ide_drive_t *cur_dev; - - /* current request */ - struct request *rq; - - /* failsafe timer */ - struct timer_list timer; - /* timeout value during long polls */ - unsigned long poll_timeout; - /* queried upon timeouts */ - int (*expiry)(ide_drive_t *); - - int req_gen; - int req_gen_timer; - - spinlock_t lock; - - int port_count; -} ide_hwgroup_t; - typedef struct ide_driver_s ide_driver_t; extern struct mutex ide_setting_mtx; @@ -1512,7 +1506,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_remove_port_from_hwgroup(ide_hwif_t *); void ide_unregister(ide_hwif_t *); void ide_register_region(struct gendisk *); -- cgit v1.2.3 From b40d1b88f1001f0224c63fa2c008914514bcef33 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:51 +0100 Subject: ide: move ide_init_port_data() and friends to ide-probe.c * Move IDE_DEFAULT_MAX_FAILURES to . * Move ide_cfg_mtx, ide_hwif_to_major[], ide_port_init_devices_data(), ide_init_port_data(), ide_init_port_hw() and ide_unregister() to ide-probe.c from ide.c. * Make ide_unregister(), ide_init_port_data(), ide_init_port_hw() and ide_cfg_mtx static. While at it: * Remove stale ide_init_port_data() documentation and ide_lock extern. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f27f130ba000..ee2f461882ad 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -37,6 +37,7 @@ typedef unsigned char byte; /* used everywhere */ /* * Probably not wise to fiddle with these */ +#define IDE_DEFAULT_MAX_FAILURES 1 #define ERROR_MAX 8 /* Max read/write errors per sector */ #define ERROR_RESET 3 /* Reset controller every 4th retry */ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ @@ -184,9 +185,6 @@ typedef struct hw_regs_s { unsigned long config; } hw_regs_t; -void ide_init_port_data(struct hwif_s *, unsigned int); -void ide_init_port_hw(struct hwif_s *, hw_regs_t *); - static inline void ide_std_init_ports(hw_regs_t *hw, unsigned long io_addr, unsigned long ctl_addr) @@ -1506,8 +1504,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_unregister(ide_hwif_t *); - void ide_register_region(struct gendisk *); void ide_unregister_region(struct gendisk *); @@ -1592,9 +1588,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive) ide_set_pio(drive, 255); } -extern spinlock_t ide_lock; -extern struct mutex ide_cfg_mtx; - #define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) char *ide_media_string(ide_drive_t *); -- cgit v1.2.3 From 898ec223fea2a2df88035e58dbf50f493577e225 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:52 +0100 Subject: ide: remove HWIF() macro Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ee2f461882ad..58b9c99482cd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -42,8 +42,6 @@ typedef unsigned char byte; /* used everywhere */ #define ERROR_RESET 3 /* Reset controller every 4th retry */ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ -#define HWIF(drive) ((ide_hwif_t *)((drive)->hwif)) - /* * Definitions for accessing IDE controller registers */ -- cgit v1.2.3 From 54cc1428cfa619e16d75baae8cb041a2eff015f0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:52 +0100 Subject: ide: remove local_irq_set() macro Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 58b9c99482cd..82d500c5a847 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1586,8 +1586,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive) ide_set_pio(drive, 255); } -#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) - char *ide_media_string(ide_drive_t *); extern struct device_attribute ide_dev_attrs[]; -- cgit v1.2.3 From c0ae50234771684ae0cbac5dfb70e1a09c22aa89 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:52 +0100 Subject: ide: remove ide_pci_enablebit_t typedef Remove needless parens while at it. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 82d500c5a847..cca6cfb299bc 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1300,11 +1300,11 @@ static inline int ide_hwif_setup_dma(ide_hwif_t *hwif, } #endif -typedef struct ide_pci_enablebit_s { +struct ide_pci_enablebit { u8 reg; /* byte pci reg holding the enable-bit */ u8 mask; /* mask to isolate the enable-bit */ u8 val; /* value of masked reg when "enabled" */ -} ide_pci_enablebit_t; +}; enum { /* Uses ISA control ports not PCI ones. */ @@ -1393,7 +1393,8 @@ struct ide_port_info { const struct ide_port_ops *port_ops; const struct ide_dma_ops *dma_ops; - ide_pci_enablebit_t enablebits[2]; + struct ide_pci_enablebit enablebits[2]; + hwif_chipset_t chipset; u16 max_sectors; /* if < than the default one */ -- cgit v1.2.3 From 9892ec5497af1ec38c46974b5a370ba11c636b19 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:53 +0100 Subject: ide: remove 'byte' typedef Just use u8 instead, also s/__u8/u8/ in ide-cd.h while at it. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index cca6cfb299bc..545a67f1f6b5 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -32,8 +32,6 @@ # define SUPPORT_VLB_SYNC 1 #endif -typedef unsigned char byte; /* used everywhere */ - /* * Probably not wise to fiddle with these */ @@ -1161,7 +1159,7 @@ void ide_pad_transfer(ide_drive_t *, int, int); ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); -ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); +ide_startstop_t ide_error(ide_drive_t *, const char *, u8); void ide_fix_driveid(u16 *); -- cgit v1.2.3 From 7f3c868ba78e486bd9d7569f884dd46d8f59bb18 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:53 +0100 Subject: ide: remove ide_driver_t typedef While at it: - s/struct ide_driver_s/struct ide_driver/ - use to_ide_driver() macro in ide-proc.c Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 545a67f1f6b5..fcbcfa2cbe75 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -437,7 +437,7 @@ struct ide_atapi_pc { }; struct ide_devset; -struct ide_driver_s; +struct ide_driver; #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_drive_link; @@ -884,8 +884,6 @@ typedef int (ide_expiry_t)(ide_drive_t *); /* used by ide-cd, ide-floppy, etc. */ typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned); -typedef struct ide_driver_s ide_driver_t; - extern struct mutex ide_setting_mtx; /* @@ -1011,8 +1009,8 @@ void ide_proc_register_port(ide_hwif_t *); void ide_proc_port_register_devices(ide_hwif_t *); void ide_proc_unregister_device(ide_drive_t *); void ide_proc_unregister_port(ide_hwif_t *); -void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); -void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); +void ide_proc_register_driver(ide_drive_t *, struct ide_driver *); +void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *); read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; @@ -1039,8 +1037,10 @@ static inline void ide_proc_register_port(ide_hwif_t *hwif) { ; } static inline void ide_proc_port_register_devices(ide_hwif_t *hwif) { ; } static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; } static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } -static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } -static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } +static inline void ide_proc_register_driver(ide_drive_t *drive, + struct ide_driver *driver) { ; } +static inline void ide_proc_unregister_driver(ide_drive_t *drive, + struct ide_driver *driver) { ; } #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif @@ -1109,7 +1109,7 @@ void ide_check_pm_state(ide_drive_t *, struct request *); * The gendriver.owner field should be set to the module owner of this driver. * The gendriver.name field should be set to the name of this driver */ -struct ide_driver_s { +struct ide_driver { const char *version; ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); @@ -1125,7 +1125,7 @@ struct ide_driver_s { #endif }; -#define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver) +#define to_ide_driver(drv) container_of(drv, struct ide_driver, gen_driver) int ide_device_get(ide_drive_t *); void ide_device_put(ide_drive_t *); -- cgit v1.2.3 From 627e05daa10896a8f012fa78e8434c07e9e55ea7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:54 +0100 Subject: ide: remove ->error method from struct ide_driver * Remove (now superfluous) ->error method from struct ide_driver. * Unexport __ide_error() and make it static. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index fcbcfa2cbe75..9f6fe1fe7a6c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1113,7 +1113,6 @@ struct ide_driver { const char *version; ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); - ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); struct device_driver gen_driver; int (*probe)(ide_drive_t *); void (*remove)(ide_drive_t *); @@ -1157,8 +1156,6 @@ void ide_execute_pkt_cmd(ide_drive_t *); void ide_pad_transfer(ide_drive_t *, int, int); -ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); - ide_startstop_t ide_error(ide_drive_t *, const char *, u8); void ide_fix_driveid(u16 *); -- cgit v1.2.3 From 5e7f3a46690f7f6c9f2781c700ab4370874aa0e8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:56 +0100 Subject: ide: dynamic allocation of device structures Allocate device structures dynamically instead of having them embedded in ide_hwif_t: * Remove needless zeroing of port structure from ide_init_port_data(). * Add ide_hwif_t.devices[MAX_DRIVES] (table of pointers to the devices). * Add ide_port_{alloc,free}_devices() helpers and use them respectively in ide_{host,free}_alloc(). * Convert all users of ->drives[] to use ->devices[] instead. While at it: * Use drive->dn for the slave device check in scc_pata.c. As a nice side-effect this patch cuts ~1kB (x86-32) from the resulting code size: text data bss dec hex filename 53963 1244 237 55444 d894 drivers/ide/ide-core.o.before 52981 1244 237 54462 d4be drivers/ide/ide-core.o.after Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9f6fe1fe7a6c..f00086b10be3 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -753,7 +753,7 @@ typedef struct hwif_s { unsigned long sata_scr[SATA_NR_PORTS]; - ide_drive_t drives[MAX_DRIVES]; /* drive info */ + ide_drive_t *devices[MAX_DRIVES]; u8 major; /* our major number */ u8 index; /* 0 for ide0; 1 for ide1; ... */ @@ -1600,7 +1600,7 @@ static inline int hwif_to_node(ide_hwif_t *hwif) static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) { - ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1]; + ide_drive_t *peer = drive->hwif->devices[(drive->dn ^ 1) & 1]; return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL; } -- cgit v1.2.3 From 2bd24a1cfc99d242c2cff9a6b74ca49fcaac3fb6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:56 +0100 Subject: ide: add port and host iterators Add ide_port_for_each_dev() / ide_host_for_each_port() iterators and update IDE code to use them. While at it: - s/unit/i/ variable in ide_port_wait_ready(), ide_probe_port(), ide_port_tune_devices(), ide_port_init_devices_data(), do_reset1(), ide_acpi_set_state() and scc_dma_end() - s/d/i/ variable in ide_proc_port_register_devices() There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f00086b10be3..4cecd923fc79 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -753,7 +753,7 @@ typedef struct hwif_s { unsigned long sata_scr[SATA_NR_PORTS]; - ide_drive_t *devices[MAX_DRIVES]; + ide_drive_t *devices[MAX_DRIVES + 1]; u8 major; /* our major number */ u8 index; /* 0 for ide0; 1 for ide1; ... */ @@ -861,7 +861,7 @@ typedef struct hwif_s { #define MAX_HOST_PORTS 4 struct ide_host { - ide_hwif_t *ports[MAX_HOST_PORTS]; + ide_hwif_t *ports[MAX_HOST_PORTS + 1]; unsigned int n_ports; struct device *dev[2]; unsigned int (*init_chipset)(struct pci_dev *); @@ -1604,4 +1604,11 @@ static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL; } + +#define ide_port_for_each_dev(i, dev, port) \ + for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++) + +#define ide_host_for_each_port(i, port, host) \ + for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) + #endif /* _IDE_H */ -- cgit v1.2.3 From d6251d4488a361c93da2398818e1ec69cffb6073 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 6 Jan 2009 17:20:58 +0100 Subject: ide-cd: convert to ide-atapi facilities ... and remove no longer needed cdrom_start_packet_command and cdrom_transfer_packet_command. Tested lightly with ide-cd and ide-floppy. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 4cecd923fc79..13deba5e0157 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -654,6 +654,8 @@ struct ide_drive_s { int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, unsigned int, int); + ide_startstop_t (*irq_handler)(struct ide_drive_s *); + unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; -- cgit v1.2.3 From 906ef986a71d541a726550fa40dcbc5c356f810e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:59 +0100 Subject: ide: struct ide_atapi_pc - remove unused fields and update documentation Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 13deba5e0157..a7dbfd857115 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -426,13 +426,9 @@ struct ide_atapi_pc { struct idetape_bh *bh; char *b_data; - /* idescsi only for now */ struct scatterlist *sg; unsigned int sg_cnt; - struct scsi_cmnd *scsi_cmd; - void (*done) (struct scsi_cmnd *); - unsigned long timeout; }; -- cgit v1.2.3 From 94c96445f32c16cfdc398b20b7e78945ab7e35f9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Jan 2009 17:20:59 +0100 Subject: ide: remove unused ide_hwif_t.sg_mapped field Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index a7dbfd857115..ebc22a836520 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -817,7 +817,6 @@ typedef struct hwif_s { unsigned extra_ports; /* number of extra dma ports */ unsigned present : 1; /* this interface exists */ - unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ unsigned busy : 1; /* serializes devices on a port */ struct device gendev; -- cgit v1.2.3 From 391ad1908a9c13d457ea12ce1508d6b8a7ba72ad Mon Sep 17 00:00:00 2001 From: Shane McDonald Date: Tue, 6 Jan 2009 17:21:01 +0100 Subject: Resurrect IT8172 IDE controller driver Support for the IT8172 IDE controller was removed from the kernel sometime after 2.6.18. Support for the only boards that used the IT8172 was removed from the kernel after 2.6.18, as they had never compiled since 2.6.0. However, there are a couple of platforms that use this chip: the PMC-Sierra Xiao Hu thin-client computer, which is no longer in production, and the Linksys NSS4000 Network Attached Storage box, which is based on the Xiao Hu board. I am attempting to add support for the Xiao Hu to the kernel, and this IT8172 IDE controller is the first bit of code in this effort. This patch resurrects the IT8172 IDE controller code. I began with the 2.6.18 version of the it8172.c file, and have moved it forward so that it works with the latest version of the kernel. I have run this driver on a PMC-Sierra Xiao Hu board with the 2.6.28 kernel, and I have had no problems with it in my configuration. The attached patch applies cleanly against 2.6.28. Signed-off-by: Shane McDonald Acked-by: Sergei Shtylyov Cc: alan@lxorguk.ukuu.org.uk [bart: s/HWIF(drive)/drive->hwif/] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 218c73b1e6d4..d543365518ab 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1658,6 +1658,7 @@ #define PCI_VENDOR_ID_ROCKWELL 0x127A #define PCI_VENDOR_ID_ITE 0x1283 +#define PCI_DEVICE_ID_ITE_8172 0x8172 #define PCI_DEVICE_ID_ITE_8211 0x8211 #define PCI_DEVICE_ID_ITE_8212 0x8212 #define PCI_DEVICE_ID_ITE_8213 0x8213 -- cgit v1.2.3 From 592b5315219881c6c0af4785f96456ad2043193a Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 6 Jan 2009 17:21:02 +0100 Subject: ide: move read_sff_dma_status() method to 'struct ide_dma_ops' Move apparently misplaced read_sff_dma_status() method from 'struct ide_tp_ops' to 'struct ide_dma_ops', renaming it to dma_sff_read_status() and making only required for SFF-8038i compatible IDE controller drivers (greatly cutting down the number of initializers) as its only user (outside ide-dma-sff.c and such drivers) appears to be ide_pci_check_simplex() which is only called for such controllers... Signed-off-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ebc22a836520..3644f6323384 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -674,7 +674,6 @@ struct ide_tp_ops { void (*exec_command)(struct hwif_s *, u8); u8 (*read_status)(struct hwif_s *); u8 (*read_altstatus)(struct hwif_s *); - u8 (*read_sff_dma_status)(struct hwif_s *); void (*set_irq)(struct hwif_s *, int); @@ -735,6 +734,11 @@ struct ide_dma_ops { int (*dma_test_irq)(struct ide_drive_s *); void (*dma_lost_irq)(struct ide_drive_s *); void (*dma_timeout)(struct ide_drive_s *); + /* + * The following method is optional and only required to be + * implemented for the SFF-8038i compatible controllers. + */ + u8 (*dma_sff_read_status)(struct hwif_s *); }; struct ide_host; @@ -1177,7 +1181,6 @@ void ide_tf_dump(const char *, struct ide_taskfile *); void ide_exec_command(ide_hwif_t *, u8); u8 ide_read_status(ide_hwif_t *); u8 ide_read_altstatus(ide_hwif_t *); -u8 ide_read_sff_dma_status(ide_hwif_t *); void ide_set_irq(ide_hwif_t *, int); @@ -1458,6 +1461,7 @@ void ide_dma_exec_cmd(ide_drive_t *, u8); extern void ide_dma_start(ide_drive_t *); int ide_dma_end(ide_drive_t *); int ide_dma_test_irq(ide_drive_t *); +u8 ide_dma_sff_read_status(ide_hwif_t *); extern const struct ide_dma_ops sff_dma_ops; #else static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } -- cgit v1.2.3 From 548eaca46b3cf4419b6c2be839a106d8641ffb70 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Oct 2008 17:48:43 -0400 Subject: nfsd: document new filehandle fsid types Descriptions taken from mountd code (in nfs-utils/utils/mountd/cache.c). Signed-off-by: J. Bruce Fields --- include/linux/nfsd/nfsfh.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index d1941cb965e9..b2e093870bc6 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -68,6 +68,10 @@ struct nfs_fhbase_old { * 1 - 4 byte user specified identifier * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED * 3 - 4 byte device id, encoded for user-space, 4 byte inode number + * 4 - 4 byte inode number and 4 byte uuid + * 5 - 8 byte uuid + * 6 - 16 byte uuid + * 7 - 8 byte inode number and 16 byte uuid * * The fileid_type identified how the file within the filesystem is encoded. * This is (will be) passed to, and set by, the underlying filesystem if it supports -- cgit v1.2.3 From c9233eb7b0b11ef176d4bf68da2ce85464b6ec39 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 20 Oct 2008 11:51:57 -0400 Subject: sunrpc: add sv_maxconn field to svc_serv (try #3) svc_check_conn_limits() attempts to prevent denial of service attacks by having the service close old connections once it reaches a threshold. This threshold is based on the number of threads in the service: (serv->sv_nrthreads + 3) * 20 Once we reach this, we drop the oldest connections and a printk pops to warn the admin that they should increase the number of threads. Increasing the number of threads isn't an option however for services like lockd. We don't want to eliminate this check entirely for such services but we need some way to increase this limit. This patch adds a sv_maxconn field to the svc_serv struct. When it's set to 0, we use the current method to calculate the max number of connections. RPC services can then set this on an as-needed basis. Signed-off-by: Jeff Layton Acked-by: Neil Brown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3afe7fb403b2..3435d24bfe55 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -58,10 +58,13 @@ struct svc_serv { struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; unsigned int sv_nrthreads; /* # of server threads */ + unsigned int sv_maxconn; /* max connections allowed or + * '0' causing max to be based + * on number of threads. */ + unsigned int sv_max_payload; /* datagram payload size */ unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ - struct list_head sv_permsocks; /* all permanent sockets */ struct list_head sv_tempsocks; /* all temporary sockets */ int sv_tmpcnt; /* count of temporary sockets */ -- cgit v1.2.3 From 7538ce1eb656a1477bedd5b1c202226e7abf5e7b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:19:45 -0500 Subject: NLM: Use modern style for pointer fields in nlm_host Clean up: I'm about to add another "char *" field to the nlm_host structure. The h_name field, for example, uses an older style of declaring a "char *" field. If I match that style for the new field, checkpatch.pl will complain. So, fix pointer fields to use the new style. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 23da3fa69efa..3dbdd353156c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -43,8 +43,8 @@ struct nlm_host { struct sockaddr_storage h_addr; /* peer address */ size_t h_addrlen; struct sockaddr_storage h_srcaddr; /* our address (optional) */ - struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ - char * h_name; /* remote hostname */ + struct rpc_clnt *h_rpcclnt; /* RPC client to talk to peer */ + char *h_name; /* remote hostname */ u32 h_version; /* interface version */ unsigned short h_proto; /* transport proto */ unsigned short h_reclaiming : 1, @@ -64,7 +64,7 @@ struct nlm_host { spinlock_t h_lock; struct list_head h_granted; /* Locks in GRANTED state */ struct list_head h_reclaim; /* Locks in RECLAIM state */ - struct nsm_handle * h_nsmhandle; /* NSM status handle */ + struct nsm_handle *h_nsmhandle; /* NSM status handle */ char h_addrbuf[48], /* address eyecatchers */ h_srcaddrbuf[48]; -- cgit v1.2.3 From 1df40b609ad5a622904eb652109c287fe9c93ec5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:19:53 -0500 Subject: NLM: Remove address eye-catcher buffers from nlm_host The h_name field in struct nlm_host is a just copy of h_nsmhandle->sm_name. Likewise, the contents of the h_addrbuf field should be identical to the sm_addrbuf field. The h_srcaddrbuf field is used only in one place for debugging. We can live without this until we get %pI formatting for printk(). Currently these buffers are 48 bytes, but we need to support scope IDs in IPv6 presentation addresses, which means making the buffers even larger. Instead, let's find ways to eliminate them to save space. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 3dbdd353156c..dae22cb4c38d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -65,9 +65,7 @@ struct nlm_host { struct list_head h_granted; /* Locks in GRANTED state */ struct list_head h_reclaim; /* Locks in RECLAIM state */ struct nsm_handle *h_nsmhandle; /* NSM status handle */ - - char h_addrbuf[48], /* address eyecatchers */ - h_srcaddrbuf[48]; + char *h_addrbuf; /* address eyecatcher */ }; struct nsm_handle { -- cgit v1.2.3 From bc995801a09d1fead0bec1356bfd836911c8eed7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:08 -0500 Subject: NLM: Support IPv6 scope IDs in nlm_display_address() Scope ID support is needed since the kernel's NSM implementation is about to use these displayed addresses as a mon_name in some cases. When nsm_use_hostnames is zero, without scope ID support NSM will fail to handle peers that contact us via a link-local address. Link-local addresses do not work without an interface ID, which is stored in the sockaddr's sin6_scope_id field. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index dae22cb4c38d..80a0a2cff2b8 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -68,6 +68,14 @@ struct nlm_host { char *h_addrbuf; /* address eyecatcher */ }; +/* + * The largest string sm_addrbuf should hold is a full-size IPv6 address + * (no "::" anywhere) with a scope ID. The buffer size is computed to + * hold eight groups of colon-separated four-hex-digit numbers, a + * percent sign, a scope id (at most 32 bits, in decimal), and NUL. + */ +#define NSM_ADDRBUF ((8 * 4 + 7) + (1 + 10) + 1) + struct nsm_handle { struct list_head sm_link; atomic_t sm_count; @@ -76,7 +84,7 @@ struct nsm_handle { size_t sm_addrlen; unsigned int sm_monitored : 1, sm_sticky : 1; /* don't unmonitor */ - char sm_addrbuf[48]; /* address eyecatcher */ + char sm_addrbuf[NSM_ADDRBUF]; }; /* -- cgit v1.2.3 From f47534f7f0ac7727e05ec4274b764b181df2cf7f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:38 -0500 Subject: NSM: Use modern style for sm_name field in nsm_handle Clean up: I'm about to add another "char *" field to the nsm_handle structure. The sm_name field uses an older style of declaring a "char *" field. If I match that style for the new field, checkpatch.pl will complain. So, fix the sm_name field to use the new style. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 80a0a2cff2b8..54dbb458e73c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -79,7 +79,7 @@ struct nlm_host { struct nsm_handle { struct list_head sm_link; atomic_t sm_count; - char * sm_name; + char *sm_name; struct sockaddr_storage sm_addr; size_t sm_addrlen; unsigned int sm_monitored : 1, -- cgit v1.2.3 From 29ed1407ed81086b778ebf12145b048ac3f7e10e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:20:46 -0500 Subject: NSM: Support IPv6 version of mon_name The "mon_name" argument of the NSMPROC_MON and NSMPROC_UNMON upcalls is a string that contains the hostname or IP address of the remote peer to be notified when this host has rebooted. The sm-notify command uses this identifier to contact the peer when we reboot, so it must be either a well-qualified DNS hostname or a presentation format IP address string. When the "nsm_use_hostnames" sysctl is set to zero, the kernel's NSM provides a presentation format IP address in the "mon_name" argument. Otherwise, the "caller_name" argument from NLM requests is used, which is usually just the DNS hostname of the peer. To support IPv6 addresses for the mon_name argument, we use the nsm_handle's address eye-catcher, which already contains an appropriate presentation format address string. Using the eye-catcher string obviates the need to use a large buffer on the stack to form the presentation address string for the upcall. This patch also addresses a subtle bug. An NSMPROC_MON request and the subsequent NSMPROC_UNMON request for the same peer are required to use the same value for the "mon_name" argument. Otherwise, rpc.statd's NSMPROC_UNMON processing cannot locate the database entry for that peer and remove it. If the setting of nsm_use_hostnames is changed between the time the kernel sends an NSMPROC_MON request and the time it sends the NSMPROC_UNMON request for the same peer, the "mon_name" argument for these two requests may not be the same. This is because the value of "mon_name" is currently chosen at the moment the call is made based on the setting of nsm_use_hostnames To ensure both requests pass identical contents in the "mon_name" argument, we now select which string to use for the argument in the nsm_monitor() function. A pointer to this string is saved in the nsm_handle so it can be used for a subsequent NSMPROC_UNMON upcall. NB: There are other potential problems, such as how nlm_host_rebooted() might behave if nsm_use_hostnames were changed while hosts are still being monitored. This patch does not attempt to address those problems. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 54dbb458e73c..d3c7247d23e8 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -79,6 +79,7 @@ struct nlm_host { struct nsm_handle { struct list_head sm_link; atomic_t sm_count; + char *sm_mon_name; char *sm_name; struct sockaddr_storage sm_addr; size_t sm_addrlen; -- cgit v1.2.3 From 1e49323c4ab044d05bbc68cf13cadcbd4372468c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:24 -0500 Subject: NLM: Move the public declaration of nsm_monitor() to lockd.h Clean up. Make the nlm_host argument "const," and move the public declaration to lockd.h with other NSM public function (nsm_release, eg) and global variable declarations. Add a documenting comment. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 4 ++++ include/linux/lockd/sm_inter.h | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index d3c7247d23e8..f15a4f5ccbfb 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -242,6 +242,10 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, unsigned int, u32); void nsm_release(struct nsm_handle *); +/* + * Host monitoring + */ +int nsm_monitor(const struct nlm_host *host); /* * This is used in garbage collection and resource reclaim diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 5a5448bdb17d..546b6102b0d7 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -41,7 +41,6 @@ struct nsm_res { u32 state; }; -int nsm_monitor(struct nlm_host *); int nsm_unmonitor(struct nlm_host *); extern int nsm_local_state; -- cgit v1.2.3 From c8c23c423dec49cb439697d3dc714e1500ff1610 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:31 -0500 Subject: NSM: Release nsmhandle in nlm_destroy_host The nsm_handle's reference count is bumped in nlm_lookup_host(). It should be decremented in nlm_destroy_host() to make it easier to see the balance of these two operations. Move the nsm_release() call to fs/lockd/host.c. The h_nsmhandle pointer is set in nlm_lookup_host(), and never cleared. The nlm_destroy_host() function is never called for the same nlm_host twice, so h_nsmhandle won't ever be NULL when nsm_unmonitor() is called. All references to the nlm_host are gone before it is freed. We can skip making h_nsmhandle NULL just before the nlm_host is deallocated. It's also likely we can remove the h_nsmhandle NULL check in nlmsvc_is_client() as well, but we can do that later when rearchitect- ing the nlm_host cache. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f15a4f5ccbfb..30a6a9c1ce42 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -240,7 +240,6 @@ void nlm_release_host(struct nlm_host *); void nlm_shutdown_hosts(void); extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, unsigned int, u32); -void nsm_release(struct nsm_handle *); /* * Host monitoring -- cgit v1.2.3 From 356c3eb466fd1a12afd6448d90fba3922836e5f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Dec 2008 14:21:38 -0500 Subject: NLM: Move the public declaration of nsm_unmonitor() to lockd.h Clean up. Make the nlm_host argument "const," and move the public declaration to lockd.h. Add a documenting comment. Bruce observed that nsm_unmonitor()'s only caller doesn't care about its return code, so make nsm_unmonitor() return void. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 30a6a9c1ce42..38344bfb814a 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -245,6 +245,7 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, * Host monitoring */ int nsm_monitor(const struct nlm_host *host); +void nsm_unmonitor(const struct nlm_host *host); /* * This is used in garbage collection and resource reclaim diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 546b6102b0d7..896a5e303323 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -41,7 +41,6 @@ struct nsm_res { u32 state; }; -int nsm_unmonitor(struct nlm_host *); extern int nsm_local_state; #endif /* LINUX_LOCKD_SM_INTER_H */ -- cgit v1.2.3 From 9c1bfd037f7ff8badaecb47418f109148d88bf45 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:01:59 -0500 Subject: NSM: Move NSM-related XDR data structures to lockd's xdr.h Clean up: NSM's XDR data structures are used only in fs/lockd/mon.c, so move them there. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/sm_inter.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 896a5e303323..dd9d8a5bb316 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -21,26 +21,6 @@ #define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 -/* - * Arguments for all calls to statd - */ -struct nsm_args { - __be32 addr; /* remote address */ - u32 prog; /* RPC callback info */ - u32 vers; - u32 proc; - - char * mon_name; -}; - -/* - * Result returned by statd - */ -struct nsm_res { - u32 status; - u32 state; -}; - extern int nsm_local_state; #endif /* LINUX_LOCKD_SM_INTER_H */ -- cgit v1.2.3 From 36e8e668d3e6a61848a8921ddeb663b417299fa5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:02:07 -0500 Subject: NSM: Move NSM program and procedure numbers to fs/lockd/mon.c Clean up: Move the RPC program and procedure numbers for NSM into the one source file that needs them: fs/lockd/mon.c. And, as with NLM, NFS, and rpcbind calls, use NSMPROC_FOO instead of SM_FOO for NSM procedure numbers. Finally, make a couple of comments more precise: what is referred to here as SM_NOTIFY is really the NLM (lockd) NLMPROC_SM_NOTIFY downcall, not NSMPROC_NOTIFY. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/sm_inter.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index dd9d8a5bb316..116bf38535a0 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -9,15 +9,6 @@ #ifndef LINUX_LOCKD_SM_INTER_H #define LINUX_LOCKD_SM_INTER_H -#define SM_PROGRAM 100024 -#define SM_VERSION 1 -#define SM_STAT 1 -#define SM_MON 2 -#define SM_UNMON 3 -#define SM_UNMON_ALL 4 -#define SM_SIMU_CRASH 5 -#define SM_NOTIFY 6 - #define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 -- cgit v1.2.3 From 67c6d107a689243979a2b5f15244b5261634a924 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:02:45 -0500 Subject: NSM: Move nsm_find() to fs/lockd/mon.c The nsm_find() function sets up fresh nsm_handle entries. This is where we will store the "priv" cookie used to lookup nsm_handles during reboot recovery. The cookie will be constructed when nsm_find() creates a new nsm_handle. As much as possible, I would like to keep everything that handles a "priv" cookie in fs/lockd/mon.c so that all the smarts are in one source file. That organization should make it pretty simple to see how all this works. To me, it makes more sense than the current arrangement to keep nsm_find() with nsm_monitor() and nsm_unmonitor(). So, start reorganizing by moving nsm_find() into fs/lockd/mon.c. The nsm_release() function comes along too, since it shares the nsm_lock global variable. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 38344bfb814a..8d715363c6ac 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -247,6 +247,12 @@ extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); +struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, + const char *hostname, + const size_t hostname_len, + const int create); +void nsm_release(struct nsm_handle *nsm); + /* * This is used in garbage collection and resource reclaim * A return value != 0 means destroy the lock/block/share -- cgit v1.2.3 From 7e44d3bea21fbb9494930d1cd35ca92a9a4a3279 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:16 -0500 Subject: NSM: Generate NSMPROC_MON's "priv" argument when nsm_handle is created Introduce a new data type, used by both the in-kernel NLM and NSM implementations, that is used to manage the opaque "priv" argument for the NSMPROC_MON and NLMPROC_SM_NOTIFY calls. Construct the "priv" cookie when the nsm_handle is created. The nsm_init_private() function may look a little strange, but it is roughly equivalent to how the XDR encoder formed the "priv" argument. It's going to go away soon. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 1 - include/linux/lockd/xdr.h | 6 ++++++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 8d715363c6ac..194fa8a66398 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -85,6 +85,7 @@ struct nsm_handle { size_t sm_addrlen; unsigned int sm_monitored : 1, sm_sticky : 1; /* don't unmonitor */ + struct nsm_private sm_priv; char sm_addrbuf[NSM_ADDRBUF]; }; diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 116bf38535a0..5cef5a79dd94 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -10,7 +10,6 @@ #define LINUX_LOCKD_SM_INTER_H #define SM_MAXSTRLEN 1024 -#define SM_PRIV_SIZE 16 extern int nsm_local_state; diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d6b3a802c046..6b5199263858 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -13,6 +13,12 @@ #include #include +#define SM_PRIV_SIZE 16 + +struct nsm_private { + unsigned char data[SM_PRIV_SIZE]; +}; + struct svc_rqst; #define NLM_MAXCOOKIELEN 32 -- cgit v1.2.3 From 7fefc9cb9d5f129c238d93166f705c96ca2e7e51 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:31 -0500 Subject: NLM: Change nlm_host_rebooted() to take a single nlm_reboot argument Pass the nlm_reboot data structure directly from the NLMPROC_SM_NOTIFY XDR decoders to nlm_host_rebooted(). This eliminates some packing and unpacking of the NLMPROC_SM_NOTIFY results, and prepares for passing these results, including the "priv" cookie, directly to a lookup routine in fs/lockd/mon.c. This patch changes code organization but should not cause any behavioral change. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 194fa8a66398..2a3533ea38dd 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -239,8 +239,7 @@ void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_release_host(struct nlm_host *); void nlm_shutdown_hosts(void); -extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, - unsigned int, u32); +void nlm_host_rebooted(const struct nlm_reboot *); /* * Host monitoring -- cgit v1.2.3 From 576df4634e37e46b441fefb91915184edb13bb94 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:39 -0500 Subject: NLM: Decode "priv" argument of NLMPROC_SM_NOTIFY as an opaque The NLM XDR decoders for the NLMPROC_SM_NOTIFY procedure should treat their "priv" argument truly as an opaque, as defined by the protocol, and let the upper layers figure out what is in it. This will make it easier to modify the contents and interpretation of the "priv" argument, and keep knowledge about what's in "priv" local to fs/lockd/mon.c. For now, the NLM and NSM implementations should behave exactly as they did before. The formation of the address of the rebooted host in nlm_host_rebooted() may look a little strange, but it is the inverse of how nsm_init_private() forms the private cookie. Plus, it's going away soon anyway. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/xdr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 6b5199263858..6338866222a8 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -83,10 +83,10 @@ struct nlm_res { * statd callback when client has rebooted */ struct nlm_reboot { - char * mon; - unsigned int len; - u32 state; - __be32 addr; + char *mon; + unsigned int len; + u32 state; + struct nsm_private priv; }; /* -- cgit v1.2.3 From 3420a8c4359a189f7d854ed7075d151257415447 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:03:46 -0500 Subject: NSM: Add nsm_lookup() function Introduce a new API to fs/lockd/mon.c that allows nlm_host_rebooted() to lookup up nsm_handles via the contents of an nlm_reboot struct. The new function is equivalent to calling nsm_find() with @create set to zero, but it takes a struct nlm_reboot instead of separate arguments. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 2a3533ea38dd..5e3ad926de89 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -251,6 +251,7 @@ struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len, const int create); +struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); /* -- cgit v1.2.3 From 92fd91b998a5216a6d6606704e71d541a180216c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Dec 2008 19:04:01 -0500 Subject: NLM: Remove "create" argument from nsm_find() Clean up: nsm_find() now has only one caller, and that caller unconditionally sets the @create argument. Thus the @create argument is no longer needed. Since nsm_find() now has a more specific purpose, pick a more appropriate name for it. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 5e3ad926de89..1ccd49e97a7f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -247,10 +247,10 @@ void nlm_host_rebooted(const struct nlm_reboot *); int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); -struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen, +struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, + const size_t salen, const char *hostname, - const size_t hostname_len, - const int create); + const size_t hostname_len); struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); -- cgit v1.2.3 From e6765b83977f07983c7a10e6bbb19d6c7bbfc3a4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:14 -0500 Subject: NSM: Remove include/linux/lockd/sm_inter.h Clean up: The include/linux/lockd/sm_inter.h header is nearly empty now. Remove it. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + include/linux/lockd/sm_inter.h | 16 ---------------- include/linux/lockd/xdr.h | 1 + 3 files changed, 2 insertions(+), 16 deletions(-) delete mode 100644 include/linux/lockd/sm_inter.h (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 1ccd49e97a7f..8b57467375cc 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -205,6 +205,7 @@ extern struct svc_procedure nlmsvc_procedures4[]; extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; extern int nsm_use_hostnames; +extern int nsm_local_state; /* * Lockd client functions diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h deleted file mode 100644 index 5cef5a79dd94..000000000000 --- a/include/linux/lockd/sm_inter.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * linux/include/linux/lockd/sm_inter.h - * - * Declarations for the kernel statd client. - * - * Copyright (C) 1996, Olaf Kirch - */ - -#ifndef LINUX_LOCKD_SM_INTER_H -#define LINUX_LOCKD_SM_INTER_H - -#define SM_MAXSTRLEN 1024 - -extern int nsm_local_state; - -#endif /* LINUX_LOCKD_SM_INTER_H */ diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 6338866222a8..7dc5b6cb44cd 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -13,6 +13,7 @@ #include #include +#define SM_MAXSTRLEN 1024 #define SM_PRIV_SIZE 16 struct nsm_private { -- cgit v1.2.3 From 8529bc51d30b8f001734b29b21a51b579c260f5b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:22 -0500 Subject: NSM: Move nsm_addr() to fs/lockd/mon.c Clean up: nsm_addr_in() is no longer used, and nsm_addr() is used only in fs/lockd/mon.c, so move it there. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 8b57467375cc..6ab0449bc828 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -112,16 +112,6 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) return (struct sockaddr *)&host->h_srcaddr; } -static inline struct sockaddr_in *nsm_addr_in(const struct nsm_handle *handle) -{ - return (struct sockaddr_in *)&handle->sm_addr; -} - -static inline struct sockaddr *nsm_addr(const struct nsm_handle *handle) -{ - return (struct sockaddr *)&handle->sm_addr; -} - /* * Map an fl_owner_t into a unique 32-bit "pid" */ -- cgit v1.2.3 From d1208f70738c91f13b4eadb1b7a694082e439da2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:44 -0500 Subject: NLM: nlm_privileged_requester() doesn't recognize mapped loopback address Commit b85e4676 added the nlm_privileged_requester() helper to check whether an RPC request was sent from a local privileged caller. It recognizes IPv4 privileged callers (from "127.0.0.1"), and IPv6 privileged callers (from "::1"). However, IPV6_ADDR_LOOPBACK is not set for the mapped IPv4 loopback address (::ffff:7f00:0001), so the test breaks when the kernel's RPC service is IPv6-enabled but user space is calling via the IPv4 loopback address. This is actually the most common case for IPv6- enabled RPC services on Linux. Rewrite the IPv6 check to handle the mapped IPv4 loopback address as well as a normal IPv6 loopback address. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 6ab0449bc828..80d7e8a8257d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -299,8 +299,14 @@ static inline int __nlm_privileged_request4(const struct sockaddr *sap) static inline int __nlm_privileged_request6(const struct sockaddr *sap) { const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - return (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK) && - (ntohs(sin6->sin6_port) < 1024); + + if (ntohs(sin6->sin6_port) > 1023) + return 0; + + if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) + return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]); + + return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK; } #else /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ static inline int __nlm_privileged_request6(const struct sockaddr *sap) -- cgit v1.2.3 From 57ef692588bc225853ca3267ca5b7cea2b07e058 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Dec 2008 17:56:52 -0500 Subject: NLM: Rewrite IPv4 privileged requester's check Clean up. For consistency, rewrite the IPv4 check to match the same style as the new IPv6 check. Note that ipv4_is_loopback() is somewhat broader in its interpretation of what is a loopback address than simply "127.0.0.1". Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 80d7e8a8257d..aa6fe7026de7 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -291,8 +291,11 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) static inline int __nlm_privileged_request4(const struct sockaddr *sap) { const struct sockaddr_in *sin = (struct sockaddr_in *)sap; - return (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) && - (ntohs(sin->sin_port) < 1024); + + if (ntohs(sin->sin_port) > 1023) + return 0; + + return ipv4_is_loopback(sin->sin_addr.s_addr); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -- cgit v1.2.3 From 6f49a57aa5a0c6d4e4e27c85f7af6c83325a12d1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: up-level reference counting to the module level Simply, if a client wants any dmaengine channel then prevent all dmaengine modules from being removed. Once the clients are done re-enable module removal. Why?, beyond reducing complication: 1/ Tracking reference counts per-transaction in an efficient manner, as is currently done, requires a complicated scheme to avoid cache-line bouncing effects. 2/ Per-transaction ref-counting gives the false impression that a dma-driver can be gracefully removed ahead of its user (net, md, or dma-slave) 3/ None of the in-tree dma-drivers talk to hot pluggable hardware, but if such an engine were built one day we still would not need to notify clients of remove events. The driver can simply return NULL to a ->prep() request, something that is much easier for a client to handle. Reviewed-by: Andrew Morton Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e4ec7e7b8056..d18d37d1015d 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -165,7 +165,6 @@ struct dma_slave { */ struct dma_chan_percpu { - local_t refcount; /* stats */ unsigned long memcpy_count; unsigned long bytes_transferred; @@ -205,26 +204,6 @@ struct dma_chan { void dma_chan_cleanup(struct kref *kref); -static inline void dma_chan_get(struct dma_chan *chan) -{ - if (unlikely(chan->slow_ref)) - kref_get(&chan->refcount); - else { - local_inc(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); - put_cpu(); - } -} - -static inline void dma_chan_put(struct dma_chan *chan) -{ - if (unlikely(chan->slow_ref)) - kref_put(&chan->refcount, dma_chan_cleanup); - else { - local_dec(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); - put_cpu(); - } -} - /* * typedef dma_event_callback - function pointer to a DMA event callback * For each channel added to the system this routine is called for each client. -- cgit v1.2.3 From bec085134e446577a983f17f57d642a88d1af53b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: centralize channel allocation, introduce dma_find_channel Allowing multiple clients to each define their own channel allocation scheme quickly leads to a pathological situation. For memory-to-memory offload all clients can share a central allocator. This simply moves the existing async_tx allocator to dmaengine with minimal fixups: * async_tx.c:get_chan_ref_by_cap --> dmaengine.c:nth_chan * async_tx.c:async_tx_rebalance --> dmaengine.c:dma_channel_rebalance * split out common code from async_tx.c:__async_tx_find_channel --> dma_find_channel Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d18d37d1015d..b466f02e2433 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -182,6 +182,7 @@ struct dma_chan_percpu { * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu * @client-count: how many clients are using this channel + * @table_count: number of appearances in the mem-to-mem allocation table */ struct dma_chan { struct dma_device *device; @@ -198,6 +199,7 @@ struct dma_chan { struct list_head device_node; struct dma_chan_percpu *local; int client_count; + int table_count; }; #define to_dma_chan(p) container_of(p, struct dma_chan, dev) @@ -468,6 +470,7 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); +struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); /* --- Helper iov-locking functions --- */ -- cgit v1.2.3 From 2ba05622b8b143b0c95968ba59bddfbd6d2f2559 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: provide a common 'issue_pending_all' implementation async_tx and net_dma each have open-coded versions of issue_pending_all, so provide a common routine in dmaengine. The implementation needs to walk the global device list, so implement rcu to allow dma_issue_pending_all to run lockless. Clients protect themselves from channel removal events by holding a dmaengine reference. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 1c816775f135..45f6297821bd 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -59,7 +59,7 @@ enum async_tx_flags { }; #ifdef CONFIG_DMA_ENGINE -void async_tx_issue_pending_all(void); +#define async_tx_issue_pending_all dma_issue_pending_all #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index b466f02e2433..57a43adfc39e 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -471,6 +471,7 @@ int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); +void dma_issue_pending_all(void); /* --- Helper iov-locking functions --- */ -- cgit v1.2.3 From f67b45999205164958de4ec0658d51fa4bee066d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: net_dma: convert to dma_find_channel Use the general-purpose channel allocation provided by dmaengine. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41e1224651cf..bac2c458d9b8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1113,9 +1113,6 @@ struct softnet_data struct sk_buff *completion_queue; struct napi_struct backlog; -#ifdef CONFIG_NET_DMA - struct dma_chan *net_dma; -#endif }; DECLARE_PER_CPU(struct softnet_data,softnet_data); -- cgit v1.2.3 From 59b5ec21446b9239d706ab237fb261d525b75e81 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: dmaengine: introduce dma_request_channel and private channels This interface is primarily for device-to-memory clients which need to search for dma channels with platform-specific characteristics. The prototype is: struct dma_chan *dma_request_channel(dma_cap_mask_t mask, dma_filter_fn filter_fn, void *filter_param); When the optional 'filter_fn' parameter is set to NULL dma_request_channel simply returns the first channel that satisfies the capability mask. Otherwise, when the mask parameter is insufficient for specifying the necessary channel, the filter_fn routine can be used to disposition the available channels in the system. The filter_fn routine is called once for each free channel in the system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags that channel to be the return value from dma_request_channel. A channel allocated via this interface is exclusive to the caller, until dma_release_channel() is called. To ensure that all channels are not consumed by the general-purpose allocator the DMA_PRIVATE capability is provided to exclude a dma_device from general-purpose (memory-to-memory) consideration. Reviewed-by: Andrew Morton Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 57a43adfc39e..fe40bc020af6 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -89,6 +89,7 @@ enum dma_transaction_type { DMA_MEMSET, DMA_MEMCPY_CRC32C, DMA_INTERRUPT, + DMA_PRIVATE, DMA_SLAVE, }; @@ -223,6 +224,18 @@ struct dma_client; typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, struct dma_chan *chan, enum dma_state state); +/** + * typedef dma_filter_fn - callback filter for dma_request_channel + * @chan: channel to be reviewed + * @filter_param: opaque parameter passed through dma_request_channel + * + * When this optional parameter is specified in a call to dma_request_channel a + * suitable channel is passed to this routine for further dispositioning before + * being returned. Where 'suitable' indicates a non-busy channel that + * satisfies the given capability mask. + */ +typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + /** * struct dma_client - info on the entity making use of DMA services * @event_callback: func ptr to call when something happens @@ -472,6 +485,9 @@ void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); void dma_issue_pending_all(void); +#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) +struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param); +void dma_release_channel(struct dma_chan *chan); /* --- Helper iov-locking functions --- */ -- cgit v1.2.3 From 33df8ca068123457db56c316946a3c0e4ef787d6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: dmatest: convert to dma_request_channel Replace the client registration infrastructure with a custom loop to poll for channels. Once dma_request_channel returns NULL stop asking for channels. A userspace side effect of this change if that loading the dmatest module before loading a dma driver will result in no channels being found, previously dmatest would get a callback. To facilitate testing in the built-in case dmatest_init is marked as a late_initcall. Another side effect is that channels under test can not be used for any other purpose. Cc: Haavard Skinnemoen Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index fe40bc020af6..6f2d070ac7f3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -400,6 +400,12 @@ __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) set_bit(tx_type, dstp->bits); } +#define dma_cap_zero(mask) __dma_cap_zero(&(mask)) +static inline void __dma_cap_zero(dma_cap_mask_t *dstp) +{ + bitmap_zero(dstp->bits, DMA_TX_TYPE_END); +} + #define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask)) static inline int __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) -- cgit v1.2.3 From 74465b4ff9ac1da503025c0a0042e023bfa6505c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:16 -0700 Subject: atmel-mci: convert to dma_request_channel and down-level dma_slave dma_request_channel provides an exclusive channel, so we no longer need to pass slave data through dmaengine. Cc: Haavard Skinnemoen Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 38 -------------------------------------- include/linux/dw_dmac.h | 31 +++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6f2d070ac7f3..d63544cf8a1a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -96,17 +96,6 @@ enum dma_transaction_type { /* last transaction type for creation of the capabilities mask */ #define DMA_TX_TYPE_END (DMA_SLAVE + 1) -/** - * enum dma_slave_width - DMA slave register access width. - * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses - * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses - * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses - */ -enum dma_slave_width { - DMA_SLAVE_WIDTH_8BIT, - DMA_SLAVE_WIDTH_16BIT, - DMA_SLAVE_WIDTH_32BIT, -}; /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, @@ -132,32 +121,6 @@ enum dma_ctrl_flags { */ typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; -/** - * struct dma_slave - Information about a DMA slave - * @dev: device acting as DMA slave - * @dma_dev: required DMA master device. If non-NULL, the client can not be - * bound to other masters than this. - * @tx_reg: physical address of data register used for - * memory-to-peripheral transfers - * @rx_reg: physical address of data register used for - * peripheral-to-memory transfers - * @reg_width: peripheral register width - * - * If dma_dev is non-NULL, the client can not be bound to other DMA - * masters than the one corresponding to this device. The DMA master - * driver may use this to determine if there is controller-specific - * data wrapped around this struct. Drivers of platform code that sets - * the dma_dev field must therefore make sure to use an appropriate - * controller-specific dma slave structure wrapping this struct. - */ -struct dma_slave { - struct device *dev; - struct device *dma_dev; - dma_addr_t tx_reg; - dma_addr_t rx_reg; - enum dma_slave_width reg_width; -}; - /** * struct dma_chan_percpu - the per-CPU part of struct dma_chan * @refcount: local_t used for open-coded "bigref" counting @@ -248,7 +211,6 @@ typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filt struct dma_client { dma_event_callback event_callback; dma_cap_mask_t cap_mask; - struct dma_slave *slave; struct list_head global_node; }; diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 04d217b442bf..d797dde247f7 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -21,15 +21,35 @@ struct dw_dma_platform_data { unsigned int nr_channels; }; +/** + * enum dw_dma_slave_width - DMA slave register access width. + * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses + * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses + * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses + */ +enum dw_dma_slave_width { + DW_DMA_SLAVE_WIDTH_8BIT, + DW_DMA_SLAVE_WIDTH_16BIT, + DW_DMA_SLAVE_WIDTH_32BIT, +}; + /** * struct dw_dma_slave - Controller-specific information about a slave - * @slave: Generic information about the slave - * @ctl_lo: Platform-specific initializer for the CTL_LO register + * + * @dma_dev: required DMA master device + * @tx_reg: physical address of data register used for + * memory-to-peripheral transfers + * @rx_reg: physical address of data register used for + * peripheral-to-memory transfers + * @reg_width: peripheral register width * @cfg_hi: Platform-specific initializer for the CFG_HI register * @cfg_lo: Platform-specific initializer for the CFG_LO register */ struct dw_dma_slave { - struct dma_slave slave; + struct device *dma_dev; + dma_addr_t tx_reg; + dma_addr_t rx_reg; + enum dw_dma_slave_width reg_width; u32 cfg_hi; u32 cfg_lo; }; @@ -54,9 +74,4 @@ struct dw_dma_slave { #define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ #define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ -static inline struct dw_dma_slave *to_dw_dma_slave(struct dma_slave *slave) -{ - return container_of(slave, struct dw_dma_slave, slave); -} - #endif /* DW_DMAC_H */ -- cgit v1.2.3 From 209b84a88fe81341b4d8d465acc4a67cb7c3feb3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:17 -0700 Subject: dmaengine: replace dma_async_client_register with dmaengine_get Now that clients no longer need to be notified of channel arrival dma_async_client_register can simply increment the dmaengine_ref_count. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d63544cf8a1a..37d95db156d3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -318,8 +318,8 @@ struct dma_device { /* --- public DMA engine API --- */ -void dma_async_client_register(struct dma_client *client); -void dma_async_client_unregister(struct dma_client *client); +void dmaengine_get(void); +void dmaengine_put(void); void dma_async_client_chan_request(struct dma_client *client); dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); -- cgit v1.2.3 From aa1e6f1a385eb2b04171ec841f3b760091e4a8ee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:17 -0700 Subject: dmaengine: kill struct dma_client and supporting infrastructure All users have been converted to either the general-purpose allocator, dma_find_channel, or dma_request_channel. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 50 +---------------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 37d95db156d3..db050e97d2b4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -28,20 +28,6 @@ #include #include -/** - * enum dma_state - resource PNP/power management state - * @DMA_RESOURCE_SUSPEND: DMA device going into low power state - * @DMA_RESOURCE_RESUME: DMA device returning to full power - * @DMA_RESOURCE_AVAILABLE: DMA device available to the system - * @DMA_RESOURCE_REMOVED: DMA device removed from the system - */ -enum dma_state { - DMA_RESOURCE_SUSPEND, - DMA_RESOURCE_RESUME, - DMA_RESOURCE_AVAILABLE, - DMA_RESOURCE_REMOVED, -}; - /** * enum dma_state_client - state of the channel in the client * @DMA_ACK: client would like to use, or was using this channel @@ -170,23 +156,6 @@ struct dma_chan { void dma_chan_cleanup(struct kref *kref); -/* - * typedef dma_event_callback - function pointer to a DMA event callback - * For each channel added to the system this routine is called for each client. - * If the client would like to use the channel it returns '1' to signal (ack) - * the dmaengine core to take out a reference on the channel and its - * corresponding device. A client must not 'ack' an available channel more - * than once. When a channel is removed all clients are notified. If a client - * is using the channel it must 'ack' the removal. A client must not 'ack' a - * removed channel more than once. - * @client - 'this' pointer for the client context - * @chan - channel to be acted upon - * @state - available or removed - */ -struct dma_client; -typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, - struct dma_chan *chan, enum dma_state state); - /** * typedef dma_filter_fn - callback filter for dma_request_channel * @chan: channel to be reviewed @@ -199,21 +168,6 @@ typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, */ typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); -/** - * struct dma_client - info on the entity making use of DMA services - * @event_callback: func ptr to call when something happens - * @cap_mask: only return channels that satisfy the requested capabilities - * a value of zero corresponds to any capability - * @slave: data for preparing slave transfer. Must be non-NULL iff the - * DMA_SLAVE capability is requested. - * @global_node: list_head for global dma_client_list - */ -struct dma_client { - dma_event_callback event_callback; - dma_cap_mask_t cap_mask; - struct list_head global_node; -}; - typedef void (*dma_async_tx_callback)(void *dma_async_param); /** * struct dma_async_tx_descriptor - async transaction descriptor @@ -285,8 +239,7 @@ struct dma_device { int dev_id; struct device *dev; - int (*device_alloc_chan_resources)(struct dma_chan *chan, - struct dma_client *client); + int (*device_alloc_chan_resources)(struct dma_chan *chan); void (*device_free_chan_resources)(struct dma_chan *chan); struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( @@ -320,7 +273,6 @@ struct dma_device { void dmaengine_get(void); void dmaengine_put(void); -void dma_async_client_chan_request(struct dma_client *client); dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, -- cgit v1.2.3 From f27c580c3628d79b17f38976d842a6d7f3616e2e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:18 -0700 Subject: dmaengine: remove 'bigref' infrastructure Reference counting is done at the module level so clients need not worry that a channel will leave while they are actively using dmaengine. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index db050e97d2b4..bca2fc758894 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -142,10 +142,6 @@ struct dma_chan { int chan_id; struct device dev; - struct kref refcount; - int slow_ref; - struct rcu_head rcu; - struct list_head device_node; struct dma_chan_percpu *local; int client_count; @@ -233,9 +229,6 @@ struct dma_device { dma_cap_mask_t cap_mask; int max_xor; - struct kref refcount; - struct completion done; - int dev_id; struct device *dev; -- cgit v1.2.3 From 7dd602510128d7a64b11ff3b7d4f30ac8e3946ce Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:19 -0700 Subject: dmaengine: kill enum dma_state_client DMA_NAK is now useless. We can just use a bool instead. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index bca2fc758894..1419a5094478 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -28,18 +28,6 @@ #include #include -/** - * enum dma_state_client - state of the channel in the client - * @DMA_ACK: client would like to use, or was using this channel - * @DMA_DUP: client has already seen this channel, or is not using this channel - * @DMA_NAK: client does not want to see any more channels - */ -enum dma_state_client { - DMA_ACK, - DMA_DUP, - DMA_NAK, -}; - /** * typedef dma_cookie_t - an opaque DMA cookie * @@ -160,9 +148,10 @@ void dma_chan_cleanup(struct kref *kref); * When this optional parameter is specified in a call to dma_request_channel a * suitable channel is passed to this routine for further dispositioning before * being returned. Where 'suitable' indicates a non-busy channel that - * satisfies the given capability mask. + * satisfies the given capability mask. It returns 'true' to indicate that the + * channel is suitable. */ -typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); +typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); typedef void (*dma_async_tx_callback)(void *dma_async_param); /** -- cgit v1.2.3 From 41d5e59c1299f27983977bcfe3b360600996051c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:21 -0700 Subject: dmaengine: add a release for dma class devices and dependent infrastructure Resolves: WARNING: at drivers/base/core.c:122 device_release+0x4d/0x52() Device 'dma0chan0' does not have a release() function, it is broken and must be fixed. The dma_chan_dev object is introduced to gear-match sysfs kobject and dmaengine channel lifetimes. When a channel is removed access to the sysfs entries return -ENODEV until the kobject can be released. The bulk of the change is updates to existing code to handle the extra layer of indirection between a dma_chan and its struct device. Reported-by: Alexander Beregalov Acked-by: Stephen Hemminger Cc: Haavard Skinnemoen Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1419a5094478..d6b6bff355f4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -113,7 +113,7 @@ struct dma_chan_percpu { * @device: ptr to the dma device who supplies this channel, always !%NULL * @cookie: last cookie value returned to client * @chan_id: channel ID for sysfs - * @class_dev: class device for sysfs + * @dev: class device for sysfs * @refcount: kref, used in "bigref" slow-mode * @slow_ref: indicates that the DMA channel is free * @rcu: the DMA channel's RCU head @@ -128,7 +128,7 @@ struct dma_chan { /* sysfs */ int chan_id; - struct device dev; + struct dma_chan_dev *dev; struct list_head device_node; struct dma_chan_percpu *local; @@ -136,7 +136,20 @@ struct dma_chan { int table_count; }; -#define to_dma_chan(p) container_of(p, struct dma_chan, dev) +/** + * struct dma_chan_dev - relate sysfs device node to backing channel device + * @chan - driver channel device + * @device - sysfs device + */ +struct dma_chan_dev { + struct dma_chan *chan; + struct device device; +}; + +static inline const char *dma_chan_name(struct dma_chan *chan) +{ + return dev_name(&chan->dev->device); +} void dma_chan_cleanup(struct kref *kref); -- cgit v1.2.3 From 864498aaa9fef69ee166da023d12413a7776342d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:21 -0700 Subject: dmaengine: use idr for registering dma device numbers This brings some predictability to dma device numbers, i.e. an rmmod/insmod cycle may now result in /sys/class/dma/dma0chan0 being restored rather than /sys/class/dma/dma1chan0 appearing. Cc: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d6b6bff355f4..64dea2ab326c 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -140,10 +140,14 @@ struct dma_chan { * struct dma_chan_dev - relate sysfs device node to backing channel device * @chan - driver channel device * @device - sysfs device + * @dev_id - parent dma_device dev_id + * @idr_ref - reference count to gate release of dma_device dev_id */ struct dma_chan_dev { struct dma_chan *chan; struct device device; + int dev_id; + atomic_t *idr_ref; }; static inline const char *dma_chan_name(struct dma_chan *chan) -- cgit v1.2.3 From adf094931ffb25ef4b381559918f1a34181a5273 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Oct 2008 22:46:05 +0200 Subject: PM: Simplify the new suspend/hibernation framework for devices PM: Simplify the new suspend/hibernation framework for devices Following the discussion at the Kernel Summit, simplify the new device PM framework by merging 'struct pm_ops' and 'struct pm_ext_ops' and removing pointers to 'struct pm_ext_ops' from 'struct platform_driver' and 'struct pci_driver'. After this change, the suspend/hibernation callbacks will only reside in 'struct device_driver' as well as at the bus type/ device class/device type level. Accordingly, PCI and platform device drivers are now expected to put their suspend/hibernation callbacks into the 'struct device_driver' embedded in 'struct pci_driver' or 'struct platform_driver', respectively. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Jesse Barnes Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 8 ++--- include/linux/pci.h | 1 - include/linux/platform_device.h | 1 - include/linux/pm.h | 76 ++++++++++++++--------------------------- 4 files changed, 29 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 1a3686d15f98..4a520051c315 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -65,7 +65,7 @@ struct bus_type { int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); - struct pm_ext_ops *pm; + struct dev_pm_ops *pm; struct bus_type_private *p; }; @@ -133,7 +133,7 @@ struct device_driver { int (*resume) (struct device *dev); struct attribute_group **groups; - struct pm_ops *pm; + struct dev_pm_ops *pm; struct driver_private *p; }; @@ -198,7 +198,7 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct pm_ops *pm; + struct dev_pm_ops *pm; struct class_private *p; }; @@ -291,7 +291,7 @@ struct device_type { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct pm_ops *pm; + struct dev_pm_ops *pm; }; /* interface for exporting device attributes */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 03b0b8c3c81b..4bb156ba854a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -421,7 +421,6 @@ struct pci_driver { int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); - struct pm_ext_ops *pm; struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 4b8cc6a32479..9a342699c607 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -55,7 +55,6 @@ struct platform_driver { int (*suspend_late)(struct platform_device *, pm_message_t state); int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); - struct pm_ext_ops *pm; struct device_driver driver; }; diff --git a/include/linux/pm.h b/include/linux/pm.h index 42de4003c4ee..5785666d0cc4 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -41,7 +41,7 @@ typedef struct pm_message { } pm_message_t; /** - * struct pm_ops - device PM callbacks + * struct dev_pm_ops - device PM callbacks * * Several driver power state transitions are externally visible, affecting * the state of pending I/O queues and (for drivers that touch hardware) @@ -126,46 +126,6 @@ typedef struct pm_message { * On most platforms, there are no restrictions on availability of * resources like clocks during @restore(). * - * All of the above callbacks, except for @complete(), return error codes. - * However, the error codes returned by the resume operations, @resume(), - * @thaw(), and @restore(), do not cause the PM core to abort the resume - * transition during which they are returned. The error codes returned in - * that cases are only printed by the PM core to the system logs for debugging - * purposes. Still, it is recommended that drivers only return error codes - * from their resume methods in case of an unrecoverable failure (i.e. when the - * device being handled refuses to resume and becomes unusable) to allow us to - * modify the PM core in the future, so that it can avoid attempting to handle - * devices that failed to resume and their children. - * - * It is allowed to unregister devices while the above callbacks are being - * executed. However, it is not allowed to unregister a device from within any - * of its own callbacks. - */ - -struct pm_ops { - int (*prepare)(struct device *dev); - void (*complete)(struct device *dev); - int (*suspend)(struct device *dev); - int (*resume)(struct device *dev); - int (*freeze)(struct device *dev); - int (*thaw)(struct device *dev); - int (*poweroff)(struct device *dev); - int (*restore)(struct device *dev); -}; - -/** - * struct pm_ext_ops - extended device PM callbacks - * - * Some devices require certain operations related to suspend and hibernation - * to be carried out with interrupts disabled. Thus, 'struct pm_ext_ops' below - * is defined, adding callbacks to be executed with interrupts disabled to - * 'struct pm_ops'. - * - * The following callbacks included in 'struct pm_ext_ops' are executed with - * the nonboot CPUs switched off and with interrupts disabled on the only - * functional CPU. They also are executed with the PM core list of devices - * locked, so they must NOT unregister any devices. - * * @suspend_noirq: Complete the operations of ->suspend() by carrying out any * actions required for suspending the device that need interrupts to be * disabled @@ -190,18 +150,32 @@ struct pm_ops { * actions required for restoring the operations of the device that need * interrupts to be disabled * - * All of the above callbacks return error codes, but the error codes returned - * by the resume operations, @resume_noirq(), @thaw_noirq(), and - * @restore_noirq(), do not cause the PM core to abort the resume transition - * during which they are returned. The error codes returned in that cases are - * only printed by the PM core to the system logs for debugging purposes. - * Still, as stated above, it is recommended that drivers only return error - * codes from their resume methods if the device being handled fails to resume - * and is not usable any more. + * All of the above callbacks, except for @complete(), return error codes. + * However, the error codes returned by the resume operations, @resume(), + * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do + * not cause the PM core to abort the resume transition during which they are + * returned. The error codes returned in that cases are only printed by the PM + * core to the system logs for debugging purposes. Still, it is recommended + * that drivers only return error codes from their resume methods in case of an + * unrecoverable failure (i.e. when the device being handled refuses to resume + * and becomes unusable) to allow us to modify the PM core in the future, so + * that it can avoid attempting to handle devices that failed to resume and + * their children. + * + * It is allowed to unregister devices while the above callbacks are being + * executed. However, it is not allowed to unregister a device from within any + * of its own callbacks. */ -struct pm_ext_ops { - struct pm_ops base; +struct dev_pm_ops { + int (*prepare)(struct device *dev); + void (*complete)(struct device *dev); + int (*suspend)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*thaw)(struct device *dev); + int (*poweroff)(struct device *dev); + int (*restore)(struct device *dev); int (*suspend_noirq)(struct device *dev); int (*resume_noirq)(struct device *dev); int (*freeze_noirq)(struct device *dev); -- cgit v1.2.3 From 7f4f5d4516b441d712fa0ffe5380618fb7fc545e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 17 Nov 2008 11:14:19 -0500 Subject: Fix misspellings in pm.h macros This patch (as1167) fixes some misspellings in various recently-added macros in pm.h. Fortunately these macros are not yet used anywhere. Signed-off-by: Alan Stern Acked-by: Rafael J. Wysocki --- include/linux/pm.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 5785666d0cc4..de2e0a8f6728 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -252,7 +252,7 @@ struct dev_pm_ops { #define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE) #define PM_EVENT_USER_SUSPEND (PM_EVENT_USER | PM_EVENT_SUSPEND) #define PM_EVENT_USER_RESUME (PM_EVENT_USER | PM_EVENT_RESUME) -#define PM_EVENT_REMOTE_WAKEUP (PM_EVENT_REMOTE | PM_EVENT_RESUME) +#define PM_EVENT_REMOTE_RESUME (PM_EVENT_REMOTE | PM_EVENT_RESUME) #define PM_EVENT_AUTO_SUSPEND (PM_EVENT_AUTO | PM_EVENT_SUSPEND) #define PM_EVENT_AUTO_RESUME (PM_EVENT_AUTO | PM_EVENT_RESUME) @@ -265,15 +265,15 @@ struct dev_pm_ops { #define PMSG_THAW ((struct pm_message){ .event = PM_EVENT_THAW, }) #define PMSG_RESTORE ((struct pm_message){ .event = PM_EVENT_RESTORE, }) #define PMSG_RECOVER ((struct pm_message){ .event = PM_EVENT_RECOVER, }) -#define PMSG_USER_SUSPEND ((struct pm_messge) \ +#define PMSG_USER_SUSPEND ((struct pm_message) \ { .event = PM_EVENT_USER_SUSPEND, }) -#define PMSG_USER_RESUME ((struct pm_messge) \ +#define PMSG_USER_RESUME ((struct pm_message) \ { .event = PM_EVENT_USER_RESUME, }) -#define PMSG_REMOTE_RESUME ((struct pm_messge) \ +#define PMSG_REMOTE_RESUME ((struct pm_message) \ { .event = PM_EVENT_REMOTE_RESUME, }) -#define PMSG_AUTO_SUSPEND ((struct pm_messge) \ +#define PMSG_AUTO_SUSPEND ((struct pm_message) \ { .event = PM_EVENT_AUTO_SUSPEND, }) -#define PMSG_AUTO_RESUME ((struct pm_messge) \ +#define PMSG_AUTO_RESUME ((struct pm_message) \ { .event = PM_EVENT_AUTO_RESUME, }) /** -- cgit v1.2.3 From 929d2fa5955ab27aa21fac615b23e0e92e8dc3a0 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 16 Oct 2008 15:51:35 -0600 Subject: driver core: Rearrange struct device for better packing This minor rearrangement saves 16 bytes from sizeof(struct device) according to pahole. Signed-off-by: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 4a520051c315..4e14fad41430 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -373,9 +373,9 @@ struct device { struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ + unsigned uevent_suppress:1; const char *init_name; /* initial name of the device */ struct device_type *type; - unsigned uevent_suppress:1; struct semaphore sem; /* semaphore to synchronize calls to * its driver. @@ -408,12 +408,13 @@ struct device { /* arch specific additions */ struct dev_archdata archdata; + dev_t devt; /* dev_t, creates the sysfs "dev" */ + spinlock_t devres_lock; struct list_head devres_head; struct klist_node knode_class; struct class *class; - dev_t devt; /* dev_t, creates the sysfs "dev" */ struct attribute_group **groups; /* optional groups */ void (*release)(struct device *dev); -- cgit v1.2.3 From 210272a28465a7a31bcd580d2f9529f924965aa5 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 16 Oct 2008 14:57:54 -0600 Subject: driver core: Remove completion from struct klist_node Removing the completion from klist_node reduces its size from 64 bytes to 28 on x86-64. To maintain the semantics of klist_remove(), we add a single list of klist nodes which are pending deletion and scan them. Signed-off-by: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- include/linux/klist.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index 8ea98db223e5..d5a27af9dba5 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -13,7 +13,6 @@ #define _LINUX_KLIST_H #include -#include #include #include @@ -41,7 +40,6 @@ struct klist_node { void *n_klist; /* never access directly */ struct list_head n_node; struct kref n_ref; - struct completion n_removed; }; extern void klist_add_tail(struct klist_node *n, struct klist *k); -- cgit v1.2.3 From 2831fe6f9cc4e16c103504ee09a47a084297c0f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2008 12:23:36 -0800 Subject: driver core: create a private portion of struct device This is to be used to move things out of struct device that no code outside of the driver core should ever touch. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 4e14fad41430..d6d34084fd37 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -28,6 +28,7 @@ #define BUS_ID_SIZE 20 struct device; +struct device_private; struct device_driver; struct driver_private; struct class; @@ -371,6 +372,8 @@ struct device { struct klist_node knode_bus; struct device *parent; + struct device_private *p; + struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ unsigned uevent_suppress:1; -- cgit v1.2.3 From 11c3b5c3e08f4d855cbef52883c266b9ab9df879 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2008 12:24:56 -0800 Subject: driver core: move klist_children into private structure Nothing outside of the driver core should ever touch klist_children, or knode_parent, so move them out of the public eye. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index d6d34084fd37..60423e687205 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,8 +366,6 @@ struct device_dma_parameters { }; struct device { - struct klist klist_children; - struct klist_node knode_parent; /* node in sibling list */ struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.3 From 93e746db183b3bdbbda67900f79b5835f9cb388f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2008 12:25:49 -0800 Subject: driver core: move knode_driver into private structure Nothing outside of the driver core should ever touch knode_driver, so move it out of the public eye. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 60423e687205..e3630222c3c1 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,7 +366,6 @@ struct device_dma_parameters { }; struct device { - struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.3 From b9daa99ee533578e3f88231e7a16784dcb44ec42 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2008 12:26:21 -0800 Subject: driver core: move knode_bus into private structure Nothing outside of the driver core should ever touch knode_bus, so move it out of the public eye. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index e3630222c3c1..e21b5d69d67c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,7 +366,6 @@ struct device_dma_parameters { }; struct device { - struct klist_node knode_bus; struct device *parent; struct device_private *p; -- cgit v1.2.3 From d0d85ff989222f08dd1fa66321fef5567bbc4a7b Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 4 Dec 2008 16:55:47 +0100 Subject: Make DEBUG take precedence over DYNAMIC_PRINTK_DEBUG Statically defined DEBUG should take precedence over dynamically enabled debugging; otherwise adding DEBUG (like, for example, via CONFIG_DEBUG_KOBJECT) does not have the expected result of printing pr_debug() and dev_dbg() messages unconditionally. Signed-off-by: Cornelia Huck Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 8 ++++---- include/linux/kernel.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index e21b5d69d67c..b97a0cf1eb05 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -553,13 +553,13 @@ extern const char *dev_driver_string(const struct device *dev); #define dev_info(dev, format, arg...) \ dev_printk(KERN_INFO , dev , format , ## arg) -#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#if defined(DEBUG) +#define dev_dbg(dev, format, arg...) \ + dev_printk(KERN_DEBUG , dev , format , ## arg) +#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG) #define dev_dbg(dev, format, ...) do { \ dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \ } while (0) -#elif defined(DEBUG) -#define dev_dbg(dev, format, arg...) \ - dev_printk(KERN_DEBUG , dev , format , ## arg) #else #define dev_dbg(dev, format, arg...) \ ({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; }) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ca9ff6411dfa..d242fe1381fd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -349,13 +349,13 @@ static inline char *pack_hex_byte(char *buf, u8 byte) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* If you are writing a driver, please use dev_dbg instead */ -#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#if defined(DEBUG) +#define pr_debug(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG) #define pr_debug(fmt, ...) do { \ dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) -#elif defined(DEBUG) -#define pr_debug(fmt, ...) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug(fmt, ...) \ ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) -- cgit v1.2.3 From 0aa0dc41bfd993491c2344870eee7a3b218551fb Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Mon, 15 Dec 2008 12:58:26 +0000 Subject: driver core: add root_device_register() Add support for allocating root device objects which group device objects under /sys/devices directories. Also add a sysfs 'module' symlink which points to the owner of the root device object. This symlink will be used in virtio to allow userspace to determine which virtio bus implementation a given device is associated with. [Includes suggestions from Cornelia Huck] Signed-off-by: Mark McLoughlin Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index b97a0cf1eb05..7d9da4b4993f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -482,6 +482,17 @@ extern struct device *device_find_child(struct device *dev, void *data, extern int device_rename(struct device *dev, char *new_name); extern int device_move(struct device *dev, struct device *new_parent); +/* + * Root device objects for grouping under /sys/devices + */ +extern struct device *__root_device_register(const char *name, + struct module *owner); +static inline struct device *root_device_register(const char *name) +{ + return __root_device_register(name, THIS_MODULE); +} +extern void root_device_unregister(struct device *root); + /* * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. -- cgit v1.2.3 From 475b44c19913b877537c8bc19799f75b0b142641 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 6 Jan 2009 10:44:38 -0800 Subject: mtd: struct device - replace bus_id with dev_name(), dev_set_name() CC: David Woodhouse Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/mtd/concat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h index c02f3d264ecf..e80c674daeb3 100644 --- a/include/linux/mtd/concat.h +++ b/include/linux/mtd/concat.h @@ -13,7 +13,7 @@ struct mtd_info *mtd_concat_create( struct mtd_info *subdev[], /* subdevices to concatenate */ int num_devs, /* number of subdevices */ - char *name); /* name for the new device */ + const char *name); /* name for the new device */ void mtd_concat_destroy(struct mtd_info *mtd); -- cgit v1.2.3 From e70c412ee45332db2636a8f5a35a0685efb0e4aa Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Sat, 6 Dec 2008 02:23:13 +0100 Subject: UIO: Pass information about ioports to userspace (V2) Devices sometimes have memory where all or parts of it can not be mapped to userspace. But it might still be possible to access this memory from userspace by other means. An example are PCI cards that advertise not only mappable memory but also ioport ranges. On x86 architectures, these can be accessed with ioperm, iopl, inb, outb, and friends. Mike Frysinger (CCed) reported a similar problem on Blackfin arch where it doesn't seem to be easy to mmap non-cached memory but it can still be accessed from userspace. This patch allows kernel drivers to pass information about such ports to userspace. Similar to the existing mem[] array, it adds a port[] array to struct uio_info. Each port range is described by start, size, and porttype. If a driver fills in at least one such port range, the UIO core will simply pass this information to userspace by creating a new directory "portio" underneath /sys/class/uio/uioN/. Similar to the "mem" directory, it will contain a subdirectory (portX) for each port range given. Note that UIO simply passes this information to userspace, it performs no action whatsoever with this data. It's userspace's responsibility to obtain access to these ports and to solve arch dependent issues. The "porttype" attribute tells userspace what kind of port it is dealing with. This mechanism could also be used to give userspace information about GPIOs related to a device. You frequently find such hardware in embedded devices, so I added a UIO_PORT_GPIO definition. I'm not really sure if this is a good idea since there are other solutions to this problem, but it won't hurt much anyway. Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index cdf338d94b7f..20be327bfbb4 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -38,6 +38,24 @@ struct uio_mem { #define MAX_UIO_MAPS 5 +struct uio_portio; + +/** + * struct uio_port - description of a UIO port region + * @start: start of port region + * @size: size of port region + * @porttype: type of port (see UIO_PORT_* below) + * @portio: for use by the UIO core only. + */ +struct uio_port { + unsigned long start; + unsigned long size; + int porttype; + struct uio_portio *portio; +}; + +#define MAX_UIO_PORT_REGIONS 5 + struct uio_device; /** @@ -46,6 +64,7 @@ struct uio_device; * @name: device name * @version: device driver version * @mem: list of mappable memory regions, size==0 for end of list + * @port: list of port regions, size==0 for end of list * @irq: interrupt number or UIO_IRQ_CUSTOM * @irq_flags: flags for request_irq() * @priv: optional private data @@ -60,6 +79,7 @@ struct uio_info { char *name; char *version; struct uio_mem mem[MAX_UIO_MAPS]; + struct uio_port port[MAX_UIO_PORT_REGIONS]; long irq; unsigned long irq_flags; void *priv; @@ -92,4 +112,10 @@ extern void uio_event_notify(struct uio_info *info); #define UIO_MEM_LOGICAL 2 #define UIO_MEM_VIRTUAL 3 +/* defines for uio_port->porttype */ +#define UIO_PORT_NONE 0 +#define UIO_PORT_X86 1 +#define UIO_PORT_GPIO 2 +#define UIO_PORT_OTHER 3 + #endif /* _LINUX_UIO_DRIVER_H_ */ -- cgit v1.2.3 From b8ac9fc0e8cda9f9776019c5b0464b0c6d2d4c90 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 12 Dec 2008 11:44:21 +0100 Subject: uio: make uio_info's name and version const These are only ever assigned constant strings and never modified. This was noticed because Wolfram Sang needed to cast the result of of_get_property() in order to assign it to the name field of a struct uio_info. Signed-off-by: Stephen Rothwell Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 20be327bfbb4..a0bb6bd2e5c1 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -76,8 +76,8 @@ struct uio_device; */ struct uio_info { struct uio_device *uio_dev; - char *name; - char *version; + const char *name; + const char *version; struct uio_mem mem[MAX_UIO_MAPS]; struct uio_port port[MAX_UIO_PORT_REGIONS]; long irq; -- cgit v1.2.3 From 96e93eab20337d063c70d537bd7bc70d90e04fa3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 6 Jan 2009 10:49:34 -0800 Subject: gro: Add internal interfaces for VLAN Previously GRO's only entry point from the outside is through napi_gro_receive and napi_gro_frags. These interfaces are for device drivers. This patch rearranges things to provide a new set of interfaces for VLANs. These interfaces are for internal use only. The VLAN code itself can then provide a set of entry points for device drivers. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c28bbba3c23d..114091be8872 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1373,8 +1373,14 @@ extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); +extern int dev_gro_receive(struct napi_struct *napi, + struct sk_buff *skb); extern int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); +extern void napi_reuse_skb(struct napi_struct *napi, + struct sk_buff *skb); +extern struct sk_buff * napi_fraginfo_skb(struct napi_struct *napi, + struct napi_gro_fraginfo *info); extern int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info); extern void netif_nit_deliver(struct sk_buff *skb); -- cgit v1.2.3 From e1c096e251e52773afeffbbcb74d0a072be47ea3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 6 Jan 2009 10:50:09 -0800 Subject: vlan: Add GRO interfaces This patch adds GRO interfaces for hardware-assisted VLAN reception. With this in place we're now at parity with LRO as far as the interface is concerned. That is, you can now take any LRO driver and convert it over to GRO. As the CB memory clashes with GRO's use of CB, I've removed it entirely by storing dev in skb->dev. This is OK because VLAN gets called first thing in netif_receive_skb and skb->dev is not used in between us calling netif_rx and netif_receive_skb getting called. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a5cb0c3f6dcf..f8ff918c208f 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -115,6 +115,11 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); extern int vlan_hwaccel_do_receive(struct sk_buff *skb); +extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb); +extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, + struct napi_gro_fraginfo *info); #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) @@ -140,6 +145,20 @@ static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) { return 0; } + +static inline int vlan_gro_receive(struct napi_struct *napi, + struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) +{ + return NET_RX_DROP; +} + +static inline int vlan_gro_frags(struct napi_struct *napi, + struct vlan_group *grp, unsigned int vlan_tci, + struct napi_gro_fraginfo *info) +{ + return NET_RX_DROP; +} #endif /** -- cgit v1.2.3 From 1fa17d4ba43d7e5aab5e90777b07da06524f6748 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 6 Jan 2009 11:07:54 -0800 Subject: can: omit unneeded skb_clone() calls The AF_CAN core delivered always cloned sk_buffs to the AF_CAN protocols, although this was _only_ needed by the can-raw protocol. With this (additionally documented) change, the AF_CAN core calls the callback functions of the registered AF_CAN protocols with the original (uncloned) sk_buff pointer and let's the can-raw protocol do the skb_clone() itself which omits all unneeded skb_clone() calls for other AF_CAN protocols. Signed-off-by: Oliver Hartkopp Signed-off-by: Urs Thuermann Signed-off-by: David S. Miller --- include/linux/can/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index f50785ad4781..25085cbadcfc 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -19,7 +19,7 @@ #include #include -#define CAN_VERSION "20081130" +#define CAN_VERSION "20090105" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "8" -- cgit v1.2.3 From 29881c4502ba05f46bc12ae8053d4e08d7e2615c Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 7 Jan 2009 09:21:54 +1100 Subject: Revert "CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #2]" This reverts commit 14eaddc967b16017d4a1a24d2be6c28ecbe06ed8. David has a better version to come. --- include/linux/capability.h | 17 ++-------------- include/linux/security.h | 49 +++++++++------------------------------------- 2 files changed, 11 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 5b8a13214451..e22f48c2a46f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -529,21 +529,8 @@ extern const kernel_cap_t __cap_init_eff_set; * * Note that this does not set PF_SUPERPRIV on the task. */ -#define has_capability(t, cap) (security_task_capable((t), (cap)) == 0) - -/** - * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) - * @t: The task in question - * @cap: The capability to be tested for - * - * Return true if the specified task has the given superior capability - * currently in effect, false if not, but don't write an audit message for the - * check. - * - * Note that this does not set PF_SUPERPRIV on the task. - */ -#define has_capability_noaudit(t, cap) \ - (security_task_capable_noaudit((t), (cap)) == 0) +#define has_capability(t, cap) (security_capable((t), (cap)) == 0) +#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0) extern int capable(int cap); diff --git a/include/linux/security.h b/include/linux/security.h index 76989b8bc34f..3416cb85e77b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -48,9 +48,7 @@ struct audit_krule; * These functions are in security/capability.c and are used * as the default capabilities functions */ -extern int cap_capable(int cap, int audit); -extern int cap_task_capable(struct task_struct *tsk, const struct cred *cred, - int cap, int audit); +extern int cap_capable(struct task_struct *tsk, int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -1197,18 +1195,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @permitted contains the permitted capability set. * Return 0 and update @new if permission is granted. * @capable: - * Check whether the current process has the @cap capability in its - * subjective/effective credentials. - * @cap contains the capability . - * @audit: Whether to write an audit message or not - * Return 0 if the capability is granted for @tsk. - * @task_capable: - * Check whether the @tsk process has the @cap capability in its - * objective/real credentials. + * Check whether the @tsk process has the @cap capability. * @tsk contains the task_struct for the process. - * @cred contains the credentials to use. * @cap contains the capability . - * @audit: Whether to write an audit message or not * Return 0 if the capability is granted for @tsk. * @acct: * Check permission before enabling or disabling process accounting. If @@ -1301,9 +1290,7 @@ struct security_operations { const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); - int (*capable) (int cap, int audit); - int (*task_capable) (struct task_struct *tsk, const struct cred *cred, - int cap, int audit); + int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); @@ -1569,9 +1556,8 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -int security_capable(int cap); -int security_task_capable(struct task_struct *tsk, int cap); -int security_task_capable_noaudit(struct task_struct *tsk, int cap); +int security_capable(struct task_struct *tsk, int cap); +int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); @@ -1768,31 +1754,14 @@ static inline int security_capset(struct cred *new, return cap_capset(new, old, effective, inheritable, permitted); } -static inline int security_capable(int cap) +static inline int security_capable(struct task_struct *tsk, int cap) { - return cap_capable(cap, SECURITY_CAP_AUDIT); + return cap_capable(tsk, cap, SECURITY_CAP_AUDIT); } -static inline int security_task_capable(struct task_struct *tsk, int cap) +static inline int security_capable_noaudit(struct task_struct *tsk, int cap) { - int ret; - - rcu_read_lock(); - ret = cap_task_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT); - rcu_read_unlock(); - return ret; -} - -static inline -int security_task_capable_noaudit(struct task_struct *tsk, int cap) -{ - int ret; - - rcu_read_lock(); - ret = cap_task_capable(tsk, __task_cred(tsk), cap, - SECURITY_CAP_NOAUDIT); - rcu_read_unlock(); - return ret; + return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT); } static inline int security_acct(struct file *file) -- cgit v1.2.3 From 3699c53c485bf0168e6500d0ed18bf931584dd7c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 6 Jan 2009 22:27:01 +0000 Subject: CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3] Fix a regression in cap_capable() due to: commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Author: David Howells Date: Fri Nov 14 10:39:26 2008 +1100 CRED: Differentiate objective and effective subjective credentials on a task The problem is that the above patch allows a process to have two sets of credentials, and for the most part uses the subjective credentials when accessing current's creds. There is, however, one exception: cap_capable(), and thus capable(), uses the real/objective credentials of the target task, whether or not it is the current task. Ordinarily this doesn't matter, since usually the two cred pointers in current point to the same set of creds. However, sys_faccessat() makes use of this facility to override the credentials of the calling process to make its test, without affecting the creds as seen from other processes. One of the things sys_faccessat() does is to make an adjustment to the effective capabilities mask, which cap_capable(), as it stands, then ignores. The affected capability check is in generic_permission(): if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; This change passes the set of credentials to be tested down into the commoncap and SELinux code. The security functions called by capable() and has_capability() select the appropriate set of credentials from the process being checked. This can be tested by compiling the following program from the XFS testsuite: /* * t_access_root.c - trivial test program to show permission bug. * * Written by Michael Kerrisk - copyright ownership not pursued. * Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html */ #include #include #include #include #include #include #define UID 500 #define GID 100 #define PERM 0 #define TESTPATH "/tmp/t_access" static void errExit(char *msg) { perror(msg); exit(EXIT_FAILURE); } /* errExit */ static void accessTest(char *file, int mask, char *mstr) { printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask)); } /* accessTest */ int main(int argc, char *argv[]) { int fd, perm, uid, gid; char *testpath; char cmd[PATH_MAX + 20]; testpath = (argc > 1) ? argv[1] : TESTPATH; perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM; uid = (argc > 3) ? atoi(argv[3]) : UID; gid = (argc > 4) ? atoi(argv[4]) : GID; unlink(testpath); fd = open(testpath, O_RDWR | O_CREAT, 0); if (fd == -1) errExit("open"); if (fchown(fd, uid, gid) == -1) errExit("fchown"); if (fchmod(fd, perm) == -1) errExit("fchmod"); close(fd); snprintf(cmd, sizeof(cmd), "ls -l %s", testpath); system(cmd); if (seteuid(uid) == -1) errExit("seteuid"); accessTest(testpath, 0, "0"); accessTest(testpath, R_OK, "R_OK"); accessTest(testpath, W_OK, "W_OK"); accessTest(testpath, X_OK, "X_OK"); accessTest(testpath, R_OK | W_OK, "R_OK | W_OK"); accessTest(testpath, R_OK | X_OK, "R_OK | X_OK"); accessTest(testpath, W_OK | X_OK, "W_OK | X_OK"); accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK"); exit(EXIT_SUCCESS); } /* main */ This can be run against an Ext3 filesystem as well as against an XFS filesystem. If successful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns 0 access(/tmp/xxx, W_OK) returns 0 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns 0 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 If unsuccessful, it will show: [root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043 ---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx access(/tmp/xxx, 0) returns 0 access(/tmp/xxx, R_OK) returns -1 access(/tmp/xxx, W_OK) returns -1 access(/tmp/xxx, X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK) returns -1 access(/tmp/xxx, R_OK | X_OK) returns -1 access(/tmp/xxx, W_OK | X_OK) returns -1 access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1 I've also tested the fix with the SELinux and syscalls LTP testsuites. Signed-off-by: David Howells Tested-by: J. Bruce Fields Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 17 +++++++++++++++-- include/linux/security.h | 41 ++++++++++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index e22f48c2a46f..02bdb768d43b 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -529,8 +529,21 @@ extern const kernel_cap_t __cap_init_eff_set; * * Note that this does not set PF_SUPERPRIV on the task. */ -#define has_capability(t, cap) (security_capable((t), (cap)) == 0) -#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0) +#define has_capability(t, cap) (security_real_capable((t), (cap)) == 0) + +/** + * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) + * @t: The task in question + * @cap: The capability to be tested for + * + * Return true if the specified task has the given superior capability + * currently in effect, false if not, but don't write an audit message for the + * check. + * + * Note that this does not set PF_SUPERPRIV on the task. + */ +#define has_capability_noaudit(t, cap) \ + (security_real_capable_noaudit((t), (cap)) == 0) extern int capable(int cap); diff --git a/include/linux/security.h b/include/linux/security.h index 3416cb85e77b..f9c390494f18 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -48,7 +48,8 @@ struct audit_krule; * These functions are in security/capability.c and are used * as the default capabilities functions */ -extern int cap_capable(struct task_struct *tsk, int cap, int audit); +extern int cap_capable(struct task_struct *tsk, const struct cred *cred, + int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -1195,9 +1196,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @permitted contains the permitted capability set. * Return 0 and update @new if permission is granted. * @capable: - * Check whether the @tsk process has the @cap capability. + * Check whether the @tsk process has the @cap capability in the indicated + * credentials. * @tsk contains the task_struct for the process. + * @cred contains the credentials to use. * @cap contains the capability . + * @audit: Whether to write an audit message or not * Return 0 if the capability is granted for @tsk. * @acct: * Check permission before enabling or disabling process accounting. If @@ -1290,7 +1294,8 @@ struct security_operations { const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); - int (*capable) (struct task_struct *tsk, int cap, int audit); + int (*capable) (struct task_struct *tsk, const struct cred *cred, + int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); @@ -1556,8 +1561,9 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -int security_capable(struct task_struct *tsk, int cap); -int security_capable_noaudit(struct task_struct *tsk, int cap); +int security_capable(int cap); +int security_real_capable(struct task_struct *tsk, int cap); +int security_real_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); @@ -1754,14 +1760,31 @@ static inline int security_capset(struct cred *new, return cap_capset(new, old, effective, inheritable, permitted); } -static inline int security_capable(struct task_struct *tsk, int cap) +static inline int security_capable(int cap) { - return cap_capable(tsk, cap, SECURITY_CAP_AUDIT); + return cap_capable(current, current_cred(), cap, SECURITY_CAP_AUDIT); } -static inline int security_capable_noaudit(struct task_struct *tsk, int cap) +static inline int security_real_capable(struct task_struct *tsk, int cap) { - return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT); + int ret; + + rcu_read_lock(); + ret = cap_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT); + rcu_read_unlock(); + return ret; +} + +static inline +int security_real_capable_noaudit(struct task_struct *tsk, int cap) +{ + int ret; + + rcu_read_lock(); + ret = cap_capable(tsk, __task_cred(tsk), cap, + SECURITY_CAP_NOAUDIT); + rcu_read_unlock(); + return ret; } static inline int security_acct(struct file *file) -- cgit v1.2.3 From 08fba69986e20c1c9e5fe2e6064d146cc4f42480 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 6 Jan 2009 14:38:53 -0800 Subject: mm: report the pagesize backing a VMA in /proc/pid/smaps It is useful to verify a hugepage-aware application is using the expected pagesizes for its memory regions. This patch creates an entry called KernelPageSize in /proc/pid/smaps that is the size of page used by the kernel to back a VMA. The entry is not called PageSize as it is possible the MMU uses a different size. This extension should not break any sensible parser that skips lines containing unrecognised information. Signed-off-by: Mel Gorman Acked-by: "KOSAKI Motohiro" Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e1c8afc002c0..648e1e25979e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -233,6 +233,8 @@ static inline unsigned long huge_page_size(struct hstate *h) return (unsigned long)PAGE_SIZE << h->order; } +extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); + static inline unsigned long huge_page_mask(struct hstate *h) { return h->mask; @@ -273,6 +275,7 @@ struct hstate {}; #define hstate_inode(i) NULL #define huge_page_size(h) PAGE_SIZE #define huge_page_mask(h) PAGE_MASK +#define vma_kernel_pagesize(v) PAGE_SIZE #define huge_page_order(h) 0 #define huge_page_shift(h) PAGE_SHIFT static inline unsigned int pages_per_huge_page(struct hstate *h) -- cgit v1.2.3 From 3340289ddf29ca75c3acfb3a6b72f234b2f74d5c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 6 Jan 2009 14:38:54 -0800 Subject: mm: report the MMU pagesize in /proc/pid/smaps The KernelPageSize entry in /proc/pid/smaps is the pagesize used by the kernel to back a VMA. This matches the size used by the MMU in the majority of cases. However, one counter-example occurs on PPC64 kernels whereby a kernel using 64K as a base pagesize may still use 4K pages for the MMU on older processor. To distinguish, this patch reports MMUPageSize as the pagesize used by the MMU in /proc/pid/smaps. Signed-off-by: Mel Gorman Cc: "KOSAKI Motohiro" Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 648e1e25979e..f1d2fba19ea0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -235,6 +235,8 @@ static inline unsigned long huge_page_size(struct hstate *h) extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); +extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); + static inline unsigned long huge_page_mask(struct hstate *h) { return h->mask; @@ -276,6 +278,7 @@ struct hstate {}; #define huge_page_size(h) PAGE_SIZE #define huge_page_mask(h) PAGE_MASK #define vma_kernel_pagesize(v) PAGE_SIZE +#define vma_mmu_pagesize(v) PAGE_SIZE #define huge_page_order(h) 0 #define huge_page_shift(h) PAGE_SHIFT static inline unsigned int pages_per_huge_page(struct hstate *h) -- cgit v1.2.3 From 1c0fe6e3bda0464728c23c8d84aa47567e8b716c Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:38:59 -0800 Subject: mm: invoke oom-killer from page fault Rather than have the pagefault handler kill a process directly if it gets a VM_FAULT_OOM, have it call into the OOM killer. With increasingly sophisticated oom behaviour (cpusets, memory cgroups, oom killing throttling, oom priority adjustment or selective disabling, panic on oom, etc), it's silly to unconditionally kill the faulting process at page fault time. Create a hook for pagefault oom path to call into instead. Only converted x86 and uml so far. [akpm@linux-foundation.org: make __out_of_memory() static] [akpm@linux-foundation.org: fix comment] Signed-off-by: Nick Piggin Cc: Jeff Dike Acked-by: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index aaa8b843be28..4a3d28c86443 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -717,6 +717,11 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +/* + * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. + */ +extern void pagefault_out_of_memory(void); + #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) extern void show_free_areas(void); -- cgit v1.2.3 From 75aa199410359dc5fbcf9025ff7af98a9d20f0d5 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 6 Jan 2009 14:39:01 -0800 Subject: oom: print triggering task's cpuset and mems allowed When cpusets are enabled, it's necessary to print the triggering task's set of allowable nodes so the subsequently printed meminfo can be interpreted correctly. We also print the task's cpuset name for informational purposes. [rientjes@google.com: task lock current before dereferencing cpuset] Cc: Paul Menage Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 8e540d32c9fe..51ea2bdea0f9 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -78,6 +78,8 @@ extern int current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); +extern void cpuset_print_task_mems_allowed(struct task_struct *p); + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } @@ -159,6 +161,10 @@ static inline void rebuild_sched_domains(void) partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_print_task_mems_allowed(struct task_struct *p) +{ +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ -- cgit v1.2.3 From c04fc586c1a480ba198f03ae7b6cbd7b57380b91 Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Tue, 6 Jan 2009 14:39:14 -0800 Subject: mm: show node to memory section relationship with symlinks in sysfs Show node to memory section relationship with symlinks in sysfs Add /sys/devices/system/node/nodeX/memoryY symlinks for all the memory sections located on nodeX. For example: /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 indicates that memory section 135 resides on node1. Also revises documentation to cover this change as well as updating Documentation/ABI/testing/sysfs-devices-memory to include descriptions of memory hotremove files 'phys_device', 'phys_index', and 'state' that were previously not described there. In addition to it always being a good policy to provide users with the maximum possible amount of physical location information for resources that can be hot-added and/or hot-removed, the following are some (but likely not all) of the user benefits provided by this change. Immediate: - Provides information needed to determine the specific node on which a defective DIMM is located. This will reduce system downtime when the node or defective DIMM is swapped out. - Prevents unintended onlining of a memory section that was previously offlined due to a defective DIMM. This could happen during node hot-add when the user or node hot-add assist script onlines _all_ offlined sections due to user or script inability to identify the specific memory sections located on the hot-added node. The consequences of reintroducing the defective memory could be ugly. - Provides information needed to vary the amount and distribution of memory on specific nodes for testing or debugging purposes. Future: - Will provide information needed to identify the memory sections that need to be offlined prior to physical removal of a specific node. Symlink creation during boot was tested on 2-node x86_64, 2-node ppc64, and 2-node ia64 systems. Symlink creation during physical memory hot-add tested on a 2-node x86_64 system. Signed-off-by: Gary Hade Signed-off-by: Badari Pulavarty Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 6 +++--- include/linux/memory_hotplug.h | 2 +- include/linux/node.h | 13 +++++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 36c82c9e6ea7..3fdc10806d31 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -79,14 +79,14 @@ static inline int memory_notify(unsigned long val, void *v) #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); -extern int register_new_memory(struct mem_section *); +extern int register_new_memory(int, struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); +extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< Date: Tue, 6 Jan 2009 14:39:15 -0800 Subject: mm: get rid of pagevec_release_nonlru() speculative page references patch (commit: e286781d5f2e9c846e012a39653a166e9d31777d) removed last pagevec_release_nonlru() caller. So this function can be removed now. This patch doesn't have any functional change. Signed-off-by: KOSAKI Motohiro Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index e90a2cb02915..7b2886fa7fdc 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -21,7 +21,6 @@ struct pagevec { }; void __pagevec_release(struct pagevec *pvec); -void __pagevec_release_nonlru(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); @@ -69,12 +68,6 @@ static inline void pagevec_release(struct pagevec *pvec) __pagevec_release(pvec); } -static inline void pagevec_release_nonlru(struct pagevec *pvec) -{ - if (pagevec_count(pvec)) - __pagevec_release_nonlru(pvec); -} - static inline void pagevec_free(struct pagevec *pvec) { if (pagevec_count(pvec)) -- cgit v1.2.3 From 64cdd548ffe26849d4cd113ac640f60606063b14 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 6 Jan 2009 14:39:16 -0800 Subject: mm: cleanup: remove #ifdef CONFIG_MIGRATION #ifdef in *.c file decrease source readability a bit. removing is better. This patch doesn't have any functional change. Signed-off-by: KOSAKI Motohiro Cc: Christoph Lameter Cc: Mel Gorman Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3f34005068d4..527602cdea1c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -7,6 +7,8 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); #ifdef CONFIG_MIGRATION +#define PAGE_MIGRATION 1 + extern int putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); @@ -20,6 +22,8 @@ extern int migrate_vmas(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, unsigned long flags); #else +#define PAGE_MIGRATION 0 + static inline int putback_lru_pages(struct list_head *l) { return 0; } static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private) { return -ENOSYS; } -- cgit v1.2.3 From e5991371ee0d1c0ce19e133c6f9075b49c5b4ae8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:22 -0800 Subject: mm: remove cgroup_mm_owner_callbacks cgroup_mm_owner_callbacks() was brought in to support the memrlimit controller, but sneaked into mainline ahead of it. That controller has now been shelved, and the mm_owner_changed() args were inadequate for it anyway (they needed an mm pointer instead of a task pointer). Remove the dead code, and restore mm_update_next_owner() locking to how it was before: taking mmap_sem there does nothing for memcontrol.c, now the only user of mm->owner. Signed-off-by: Hugh Dickins Cc: Paul Menage Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1164963c3a85..08b78c09b09a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -329,13 +329,7 @@ struct cgroup_subsys { struct cgroup *cgrp); void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); - /* - * This routine is called with the task_lock of mm->owner held - */ - void (*mm_owner_changed)(struct cgroup_subsys *ss, - struct cgroup *old, - struct cgroup *new, - struct task_struct *p); + int subsys_id; int active; int disabled; @@ -400,9 +394,6 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -void cgroup_mm_owner_callbacks(struct task_struct *old, - struct task_struct *new); - #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } @@ -420,9 +411,6 @@ static inline int cgroupstats_build(struct cgroupstats *stats, return -EINVAL; } -static inline void cgroup_mm_owner_callbacks(struct task_struct *old, - struct task_struct *new) {} - #endif /* !CONFIG_CGROUPS */ #endif /* _LINUX_CGROUP_H */ -- cgit v1.2.3 From 3c1d43787b48c798f44dc32a6e6deb5ca2da3e68 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:23 -0800 Subject: mm: remove GFP_HIGHUSER_PAGECACHE GFP_HIGHUSER_PAGECACHE is just an alias for GFP_HIGHUSER_MOVABLE, making that harder to track down: remove it, and its out-of-work brothers GFP_NOFS_PAGECACHE and GFP_USER_PAGECACHE. Since we're making that improvement to hotremove_migrate_alloc(), I think we can now also remove one of the "o"s from its comment. Signed-off-by: Hugh Dickins Acked-by: Mel Gorman Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e8003afeffba..dd20cd78faa8 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -69,12 +69,6 @@ struct vm_area_struct; #define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ __GFP_HARDWALL | __GFP_HIGHMEM | \ __GFP_MOVABLE) -#define GFP_NOFS_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_MOVABLE) -#define GFP_USER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ - __GFP_HARDWALL | __GFP_MOVABLE) -#define GFP_HIGHUSER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ - __GFP_HARDWALL | __GFP_HIGHMEM | \ - __GFP_MOVABLE) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) -- cgit v1.2.3 From 6d91add09f4bad5f4d4233b13faa392f0c4b16be Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:24 -0800 Subject: mm: add Set,ClearPageSwapCache stubs If we add NOOP stubs for SetPageSwapCache() and ClearPageSwapCache(), then we can remove the #ifdef CONFIG_SWAPs from mm/migrate.c. Signed-off-by: Hugh Dickins Acked-by: Christoph Lameter Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b12f93a3c345..628ec0802492 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -228,6 +228,7 @@ PAGEFLAG_FALSE(HighMem) PAGEFLAG(SwapCache, swapcache) #else PAGEFLAG_FALSE(SwapCache) + SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache) #endif #ifdef CONFIG_UNEVICTABLE_LRU -- cgit v1.2.3 From b5934c531849ff4a51ce0f290141efe564290e40 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:25 -0800 Subject: mm: add_active_or_unevictable into rmap lru_cache_add_active_or_unevictable() and page_add_new_anon_rmap() always appear together. Save some symbol table space and some jumping around by removing lru_cache_add_active_or_unevictable(), folding its code into page_add_new_anon_rmap(): like how we add file pages to lru just after adding them to page cache. Remove the nearby "TODO: is this safe?" comments (yes, it is safe), and change page_add_new_anon_rmap()'s address BUG_ON to VM_BUG_ON as originally intended. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Cc: Lee Schermerhorn Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index a3af95b2cb6d..48f309dc5a0c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -174,8 +174,6 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); -extern void lru_cache_add_active_or_unevictable(struct page *, - struct vm_area_struct *); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); -- cgit v1.2.3 From 2afd1c928f1132b8d0099866e75ce8ad713a1180 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:26 -0800 Subject: mm: make page_lock_anon_vma() static page_lock_anon_vma() and page_unlock_anon_vma() were made available to show_page_path() in vmscan.c; but now that has been removed, make them static in rmap.c again, they're better kept private if possible. Signed-off-by: Hugh Dickins Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 89f0564b10c8..3593b18a07dd 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -63,9 +63,6 @@ void anon_vma_unlink(struct vm_area_struct *); void anon_vma_link(struct vm_area_struct *); void __anon_vma_link(struct vm_area_struct *); -extern struct anon_vma *page_lock_anon_vma(struct page *page); -extern void page_unlock_anon_vma(struct anon_vma *anon_vma); - /* * rmap interfaces called when adding or removing pte of page */ -- cgit v1.2.3 From 364aeb2849789b51bf4b9af2ddd02fee7285c54e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 6 Jan 2009 14:39:29 -0800 Subject: mm: change dirty limit type specifiers to unsigned long The background dirty and dirty limits are better defined with type specifiers of unsigned long since negative writeback thresholds are not possible. These values, as returned by get_dirty_limits(), are normally compared with ZVC values to determine whether writeback shall commence or be throttled. Such page counts cannot be negative, so declaring the page limits as signed is unnecessary. Acked-by: Peter Zijlstra Cc: Dave Chinner Cc: Christoph Lameter Signed-off-by: David Rientjes Cc: Andrea Righi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index e585657e9831..259e9ea58cab 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -125,8 +125,8 @@ struct file; int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, - struct backing_dev_info *bdi); +void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, + unsigned long *pbdi_dirty, struct backing_dev_info *bdi); void page_writeback_init(void); void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, -- cgit v1.2.3 From 2da02997e08d3efe8174c7a47696e6f7cbe69ba9 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 6 Jan 2009 14:39:31 -0800 Subject: mm: add dirty_background_bytes and dirty_bytes sysctls This change introduces two new sysctls to /proc/sys/vm: dirty_background_bytes and dirty_bytes. dirty_background_bytes is the counterpart to dirty_background_ratio and dirty_bytes is the counterpart to dirty_ratio. With growing memory capacities of individual machines, it's no longer sufficient to specify dirty thresholds as a percentage of the amount of dirtyable memory over the entire system. dirty_background_bytes and dirty_bytes specify quantities of memory, in bytes, that represent the dirty limits for the entire system. If either of these values is set, its value represents the amount of dirty memory that is needed to commence either background or direct writeback. When a `bytes' or `ratio' file is written, its counterpart becomes a function of the written value. For example, if dirty_bytes is written to be 8096, 8K of memory is required to commence direct writeback. dirty_ratio is then functionally equivalent to 8K / the amount of dirtyable memory: dirtyable_memory = free pages + mapped pages + file cache dirty_background_bytes = dirty_background_ratio * dirtyable_memory -or- dirty_background_ratio = dirty_background_bytes / dirtyable_memory AND dirty_bytes = dirty_ratio * dirtyable_memory -or- dirty_ratio = dirty_bytes / dirtyable_memory Only one of dirty_background_bytes and dirty_background_ratio may be specified at a time, and only one of dirty_bytes and dirty_ratio may be specified. When one sysctl is written, the other appears as 0 when read. The `bytes' files operate on a page size granularity since dirty limits are compared with ZVC values, which are in page units. Prior to this change, the minimum dirty_ratio was 5 as implemented by get_dirty_limits() although /proc/sys/vm/dirty_ratio would show any user written value between 0 and 100. This restriction is maintained, but dirty_bytes has a lower limit of only one page. Also prior to this change, the dirty_background_ratio could not equal or exceed dirty_ratio. This restriction is maintained in addition to restricting dirty_background_bytes. If either background threshold equals or exceeds that of the dirty threshold, it is implicitly set to half the dirty threshold. Acked-by: Peter Zijlstra Cc: Dave Chinner Cc: Christoph Lameter Signed-off-by: David Rientjes Cc: Andrea Righi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 259e9ea58cab..bb28c975c1d7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -107,7 +107,9 @@ void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ extern int dirty_background_ratio; +extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; +extern unsigned long vm_dirty_bytes; extern int dirty_writeback_interval; extern int dirty_expire_interval; extern int vm_highmem_is_dirtyable; @@ -116,9 +118,18 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); +extern int dirty_background_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); +extern int dirty_background_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int dirty_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); struct ctl_table; struct file; -- cgit v1.2.3 From 7b1fe59793e61f826bef053107b57b23954833bb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:34 -0800 Subject: mm: reuse_swap_page replaces can_share_swap_page A good place to free up old swap is where do_wp_page(), or do_swap_page(), is about to redirty the page: the data on disk is then stale and won't be read again; and if we do decide to write the page out later, using the previous swap location makes an unnecessary disk seek very likely. So give can_share_swap_page() the side-effect of delete_from_swap_cache() when it safely can. And can_share_swap_page() was always a misleading name, the more so if it has a side-effect: rename it reuse_swap_page(). Irrelevant cleanup nearby: remove swap_token_default_timeout definition from swap.h: it's used nowhere. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 48f309dc5a0c..366556c5b148 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -304,7 +304,7 @@ extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); -extern int can_share_swap_page(struct page *); +extern int reuse_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); extern int remove_exclusive_swap_page_ref(struct page *); struct backing_dev_info; @@ -372,8 +372,6 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } -#define can_share_swap_page(p) (page_mapcount(p) == 1) - static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { @@ -388,7 +386,7 @@ static inline void delete_from_swap_cache(struct page *page) { } -#define swap_token_default_timeout 0 +#define reuse_swap_page(page) (page_mapcount(page) == 1) static inline int remove_exclusive_swap_page(struct page *p) { -- cgit v1.2.3 From a2c43eed8334e878702fca713b212ae2a11d84b9 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:36 -0800 Subject: mm: try_to_free_swap replaces remove_exclusive_swap_page remove_exclusive_swap_page(): its problem is in living up to its name. It doesn't matter if someone else has a reference to the page (raised page_count); it doesn't matter if the page is mapped into userspace (raised page_mapcount - though that hints it may be worth keeping the swap): all that matters is that there be no more references to the swap (and no writeback in progress). swapoff (try_to_unuse) has been removing pages from swapcache for years, with no concern for page count or page mapcount, and we used to have a comment in lookup_swap_cache() recognizing that: if you go for a page of swapcache, you'll get the right page, but it could have been removed from swapcache by the time you get page lock. So, give up asking for exclusivity: get rid of remove_exclusive_swap_page(), and remove_exclusive_swap_page_ref() and remove_exclusive_swap_page_count() which were spawned for the recent LRU work: replace them by the simpler try_to_free_swap() which just checks page_swapcount(). Similarly, remove the page_count limitation from free_swap_and_count(), but assume that it's worth holding on to the swap if page is mapped and swap nowhere near full. Add a vm_swap_full() test in free_swap_cache()? It would be consistent, but I think we probably have enough for now. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Cc: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 366556c5b148..c3ecd478840e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -305,8 +305,7 @@ extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); extern int reuse_swap_page(struct page *); -extern int remove_exclusive_swap_page(struct page *); -extern int remove_exclusive_swap_page_ref(struct page *); +extern int try_to_free_swap(struct page *); struct backing_dev_info; /* linux/mm/thrash.c */ @@ -388,12 +387,7 @@ static inline void delete_from_swap_cache(struct page *page) #define reuse_swap_page(page) (page_mapcount(page) == 1) -static inline int remove_exclusive_swap_page(struct page *p) -{ - return 0; -} - -static inline int remove_exclusive_swap_page_ref(struct page *page) +static inline int try_to_free_swap(struct page *page) { return 0; } -- cgit v1.2.3 From ac47b003d03c2a4f28aef1d505b66d24ad191c4f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:39 -0800 Subject: mm: remove gfp_mask from add_to_swap Remove gfp_mask argument from add_to_swap(): it's misleading because its only caller, shrink_page_list(), is not atomic at that point; and in due course (implementing discard) we'll sometimes want to allocate some memory with GFP_NOIO (as is used in swap_writepage) when allocating swap. No change to the gfp_mask passed down to add_to_swap_cache(): still use __GFP_HIGH without __GFP_WAIT (with nomemalloc and nowarn as before): though it's not obvious if that's the best combination to ask for here. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Cc: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index c3ecd478840e..c38bd157695b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -278,7 +278,7 @@ extern void end_swap_bio_read(struct bio *bio, int err); extern struct address_space swapper_space; #define total_swapcache_pages swapper_space.nrpages extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *, gfp_t); +extern int add_to_swap(struct page *); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); -- cgit v1.2.3 From 60371d971a3d01afd102f0bbf2681f32ecc31d78 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:40 -0800 Subject: mm: add add_to_swap stub If we add a failing stub for add_to_swap(), then we can remove the #ifdef CONFIG_SWAP from mm/vmscan.c. This was intended as a source cleanup, but looking more closely, it turns out that the !CONFIG_SWAP case was going to keep_locked for an anonymous page, whereas now it goes to the more suitable activate_locked, like the CONFIG_SWAP nr_swap_pages 0 case. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index c38bd157695b..c0d23ac710d5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -371,6 +371,11 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } +static inline int add_to_swap(struct page *page) +{ + return 0; +} + static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { -- cgit v1.2.3 From b962716b459505a8d83aea313fea0abe76749f42 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:41 -0800 Subject: mm: optimize get_scan_ratio for no swap Rik suggests a simplified get_scan_ratio() for !CONFIG_SWAP. Yes, the gcc optimizer gives us that, when nr_swap_pages is #defined as 0L. Move usual declaration to swapfile.c: it never belonged in page_alloc.c. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index c0d23ac710d5..3a31cc25bd2c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -163,7 +163,6 @@ struct swap_list_t { /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; -extern long nr_swap_pages; extern unsigned int nr_free_buffer_pages(void); extern unsigned int nr_free_pagecache_pages(void); @@ -291,6 +290,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); /* linux/mm/swapfile.c */ +extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); @@ -331,7 +331,8 @@ static inline void disable_swap_token(void) #else /* CONFIG_SWAP */ -#define total_swap_pages 0 +#define nr_swap_pages 0L +#define total_swap_pages 0L #define total_swapcache_pages 0UL #define si_swapinfo(val) \ -- cgit v1.2.3 From 69beeb1d3428424fbc7546f85e5cd7ac4119c09d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 6 Jan 2009 14:39:46 -0800 Subject: mm: make vread() and vwrite() declaration Sparse output following warnings. mm/vmalloc.c:1436:6: warning: symbol 'vread' was not declared. Should it be static? mm/vmalloc.c:1474:6: warning: symbol 'vwrite' was not declared. Should it be static? However, it is used by /dev/kmem. fixed here. Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 307b88577eaa..506e7620a986 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -97,6 +97,10 @@ extern void unmap_kernel_range(unsigned long addr, unsigned long size); extern struct vm_struct *alloc_vm_area(size_t size); extern void free_vm_area(struct vm_struct *area); +/* for /dev/kmem */ +extern long vread(char *buf, char *addr, unsigned long count); +extern long vwrite(char *buf, char *addr, unsigned long count); + /* * Internals. Dont't use.. */ -- cgit v1.2.3 From 22c6f8fdb31993cf49bdd4a47b64a7002391e1c7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:48 -0800 Subject: swapfile: remove SWP_ACTIVE mask Remove the SWP_ACTIVE mask: it just obscures the SWP_WRITEOK flag. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 3a31cc25bd2c..410c8e473727 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -120,7 +120,6 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_ACTIVE = (SWP_USED | SWP_WRITEOK), /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit v1.2.3 From ebebbbe904634b0ca1c674457b399f68db5e05b1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:50 -0800 Subject: swapfile: rearrange scan and swap_info Before making functional changes, rearrange scan_swap_map() to simplify subsequent diffs. Actually, there is one functional change in there: leave cluster_nr negative while scanning for a new cluster - resetting it early increased the likelihood that when we have difficulty finding a free cluster, another task may come in and try doing exactly the same - just a waste of cpu. Before making functional changes, rearrange struct swap_info_struct slightly: flags will be needed as an unsigned long (for wait_on_bit), next is a good int to pair with prio, old_block_size is uninteresting so shift it to the end. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 410c8e473727..9cabb8b21aba 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -133,14 +133,14 @@ enum { * The in-memory structure used to track swap areas. */ struct swap_info_struct { - unsigned int flags; + unsigned long flags; int prio; /* swap priority */ + int next; /* next entry on swap list */ struct file *swap_file; struct block_device *bdev; struct list_head extent_list; struct swap_extent *curr_swap_extent; - unsigned old_block_size; - unsigned short * swap_map; + unsigned short *swap_map; unsigned int lowest_bit; unsigned int highest_bit; unsigned int cluster_next; @@ -148,7 +148,7 @@ struct swap_info_struct { unsigned int pages; unsigned int max; unsigned int inuse_pages; - int next; /* next entry on swap list */ + unsigned int old_block_size; }; struct swap_list_t { -- cgit v1.2.3 From 6a6ba83175c029c7820765bae44692266b29e67a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:51 -0800 Subject: swapfile: swapon use discard (trim) When adding swap, all the old data on swap can be forgotten: sys_swapon() discard all but the header page of the swap partition (or every extent but the header of the swap file), to give a solidstate swap device the opportunity to optimize its wear-levelling. If that succeeds, note SWP_DISCARDABLE for later use, and report it with a "D" at the right end of the kernel's "Adding ... swap" message. Perhaps something should be shown in /proc/swaps (swapon -s), but we have to be more cautious before making any addition to that format. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 9cabb8b21aba..0b9210ea96c7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -120,6 +120,7 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ + SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit v1.2.3 From 7992fde72ce06c73280a1939b7a1e903bc95ef85 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:53 -0800 Subject: swapfile: swap allocation use discard When scan_swap_map() finds a free cluster of swap pages to allocate, discard the old contents of the cluster if the device supports discard. But don't bother when swap is so fragmented that we allocate single pages. Be careful about racing allocations made while we're scanning for a cluster; and hold up allocations made while we're discarding. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 0b9210ea96c7..fe79f44c858e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -121,6 +121,7 @@ enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; @@ -144,6 +145,8 @@ struct swap_info_struct { unsigned short *swap_map; unsigned int lowest_bit; unsigned int highest_bit; + unsigned int lowest_alloc; /* while preparing discard cluster */ + unsigned int highest_alloc; /* while preparing discard cluster */ unsigned int cluster_next; unsigned int cluster_nr; unsigned int pages; -- cgit v1.2.3 From 20137a490f397d9c01fc9fadd83a8d198bda4477 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:54 -0800 Subject: swapfile: swapon randomize if nonrot Swap allocation has always started from the beginning of the swap area; but if we're dealing with a solidstate swap device which can only remap blocks within limited zones, that would sooner wear out the first zone. Therefore sys_swapon() test whether blk_queue is non-rotational, and if so randomize the cluster_next starting position for allocation. If blk_queue is nonrot, note SWP_SOLIDSTATE for later use, and report it with an "SS" at the right end of the kernel's "Adding ... swap" message (so that if it's both nonrot and discardable, "SSD" will be shown there). Perhaps something should be shown in /proc/swaps (swapon -s), but we have to be more cautious before making any addition to that format. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index fe79f44c858e..cbf7fbed3dfd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -122,6 +122,7 @@ enum { SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ + SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit v1.2.3 From 79f4b7bf393e67bbffec807cc68caaefc72b82ee Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:40:05 -0800 Subject: badpage: simplify page_alloc flag check+clear Simplify the PAGE_FLAGS checking and clearing when freeing and allocating a page: check the same flags as before when freeing, clear ALL the flags (unless PageReserved) when freeing, check ALL flags off when allocating. Signed-off-by: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 628ec0802492..219a523ecdb0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -373,31 +373,22 @@ static inline void __ClearPageTail(struct page *page) #define __PG_MLOCKED 0 #endif -#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ - 1 << PG_buddy | 1 << PG_writeback | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - __PG_UNEVICTABLE | __PG_MLOCKED) - -/* - * Flags checked in bad_page(). Pages on the free list should not have - * these flags set. It they are, there is a problem. - */ -#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \ - 1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked) - /* * Flags checked when a page is freed. Pages being freed should not have * these flags set. It they are, there is a problem. */ -#define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved) +#define PAGE_FLAGS_CHECK_AT_FREE \ + (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ + 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ + 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ + __PG_UNEVICTABLE | __PG_MLOCKED) /* * Flags checked when a page is prepped for return by the page allocator. - * Pages being prepped should not have these flags set. It they are, there - * is a problem. + * Pages being prepped should not have any flags set. It they are set, + * there has been a kernel bug or struct page corruption. */ -#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \ - 1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked) +#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ -- cgit v1.2.3 From 2509ef26db4699a5d9fa876e90ddfc107afcab84 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:40:10 -0800 Subject: badpage: zap print_bad_pte on swap and file Complete zap_pte_range()'s coverage of bad pagetable entries by calling print_bad_pte() on a pte_file in a linear vma and on a bad swap entry. That needs free_swap_and_cache() to tell it, which will also have shown one of those "swap_free" errors (but with much less information). Similar checks in fork's copy_one_pte()? No, that would be more noisy than helpful: we'll see them when parent and child exec or exit. Where do_nonlinear_fault() calls print_bad_pte(): omit !VM_CAN_NONLINEAR case, that could only be a bug in sys_remap_file_pages(), not a bad pte. VM_FAULT_OOM rather than VM_FAULT_SIGBUS? Well, okay, that is consistent with what happens if do_swap_page() operates a bad swap entry; but don't we have patches to be more careful about killing when VM_FAULT_OOM? Signed-off-by: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index cbf7fbed3dfd..91dee50fe260 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -302,7 +302,7 @@ extern swp_entry_t get_swap_page_of_type(int); extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); -extern void free_swap_and_cache(swp_entry_t); +extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); @@ -352,14 +352,8 @@ static inline void show_swap_cache_info(void) { } -static inline void free_swap_and_cache(swp_entry_t swp) -{ -} - -static inline int swap_duplicate(swp_entry_t swp) -{ - return 0; -} +#define free_swap_and_cache(swp) is_migration_entry(swp) +#define swap_duplicate(swp) is_migration_entry(swp) static inline void swap_free(swp_entry_t swp) { -- cgit v1.2.3 From edc315fd222497ae4f4b959a9e31ada1e68a4755 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:40:11 -0800 Subject: badpage: remove vma from page_remove_rmap Remove page_remove_rmap()'s vma arg, which was only for the Eeek message. And remove the BUG_ON(page_mapcount(page) == 0) from CONFIG_DEBUG_VM's page_dup_rmap(): we're trying to be more resilient about that than BUGs. Signed-off-by: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 3593b18a07dd..b35bc0e19cd9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -69,7 +69,7 @@ void __anon_vma_link(struct vm_area_struct *); void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_file_rmap(struct page *); -void page_remove_rmap(struct page *, struct vm_area_struct *); +void page_remove_rmap(struct page *); #ifdef CONFIG_DEBUG_VM void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address); -- cgit v1.2.3 From 4f5a99d64c17470a784a6c68064207d82e3e74a5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:40:25 -0800 Subject: fs: remove WB_SYNC_HOLD Remove WB_SYNC_HOLD. The primary motiviation is the design of my anti-starvation code for fsync. It requires taking an inode lock over the sync operation, so we could run into lock ordering problems with multiple inodes. It is possible to take a single global lock to solve the ordering problem, but then that would prevent a future nice implementation of "sync multiple inodes" based on lock order via inode address. Seems like a backward step to remove this, but actually it is busted anyway: we can't use the inode lists for data integrity wait: an inode can be taken off the dirty lists but still be under writeback. In order to satisfy data integrity semantics, we should wait for it to finish writeback, but if we only search the dirty lists, we'll miss it. It would be possible to have a "writeback" list, for sys_sync, I suppose. But why complicate things by prematurely optimise? For unmounting, we could avoid the "livelock avoidance" code, which would be easier, but again premature IMO. Fixing the existing data integrity problem will come next. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index bb28c975c1d7..7300ecdc480c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -30,7 +30,6 @@ static inline int task_is_pdflush(struct task_struct *task) enum writeback_sync_modes { WB_SYNC_NONE, /* Don't wait on anything */ WB_SYNC_ALL, /* Wait on every mapping */ - WB_SYNC_HOLD, /* Hold the inode on sb_dirty for sys_sync() */ }; /* -- cgit v1.2.3 From 856bf4d717feb8c55d4e2f817b71ebb70cfbc67b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:40:26 -0800 Subject: fs: sys_sync fix s_syncing livelock avoidance was breaking data integrity guarantee of sys_sync, by allowing sys_sync to skip writing or waiting for superblocks if there is a concurrent sys_sync happening. This livelock avoidance is much less important now that we don't have the get_super_to_sync() call after every sb that we sync. This was replaced by __put_super_and_need_restart. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index fb59673c60b1..d7eba77f666e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1133,7 +1133,6 @@ struct super_block { struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_syncing; int s_need_sync_fs; atomic_t s_active; #ifdef CONFIG_SECURITY -- cgit v1.2.3 From 901608d9045146aec6f14a7777ea4b1501c379f0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 6 Jan 2009 14:40:29 -0800 Subject: mm: introduce get_mm_hiwater_xxx(), fix taskstats->hiwater_xxx accounting xacct_add_tsk() relies on do_exit()->update_hiwater_xxx() and uses mm->hiwater_xxx directly, this leads to 2 problems: - taskstats_user_cmd() can call fill_pid()->xacct_add_tsk() at any moment before the task exits, so we should check the current values of rss/vm anyway. - do_exit()->update_hiwater_xxx() calls are racy. An exiting thread can be preempted right before mm->hiwater_xxx = new_val, and another thread can use A_LOT of memory and exit in between. When the first thread resumes it can be the last thread in the thread group, in that case we report the wrong hiwater_xxx values which do not take A_LOT into account. Introduce get_mm_hiwater_rss() and get_mm_hiwater_vm() helpers and change xacct_add_tsk() to use them. The first helper will also be used by rusage->ru_maxrss accounting. Kill do_exit()->update_hiwater_xxx() calls. Unless we are going to decrease rss/vm there is no point to update mm->hiwater_xxx, and nobody can look at this mm_struct when exit_mmap() actually unmaps the memory. Signed-off-by: Oleg Nesterov Acked-by: Hugh Dickins Reviewed-by: KOSAKI Motohiro Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 38a3f4b15394..ea415136ac9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -386,6 +386,9 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) +#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm)) +#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm) + extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -- cgit v1.2.3 From ea435467500612636f8f4fb639ff6e76b2496e4b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 6 Jan 2009 14:40:39 -0800 Subject: atomic_t: unify all arch definitions The atomic_t type cannot currently be used in some header files because it would create an include loop with asm/atomic.h. Move the type definition to linux/types.h to break the loop. Signed-off-by: Matthew Wilcox Cc: Huang Ying Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/types.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 121f349cb7ec..3b864f2d9560 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -195,6 +195,16 @@ typedef u32 phys_addr_t; typedef phys_addr_t resource_size_t; +typedef struct { + volatile int counter; +} atomic_t; + +#ifdef CONFIG_64BIT +typedef struct { + volatile long counter; +} atomic64_t; +#endif + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; -- cgit v1.2.3 From f1883f86dea84fe47a71a39fc1afccc005915ed8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Jan 2009 14:40:45 -0800 Subject: Remove remaining unwinder code Signed-off-by: Alexey Dobriyan Cc: Gabor Gombas Cc: Jan Beulich Cc: Andi Kleen Cc: Ingo Molnar , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 3 --- include/linux/unwind.h | 68 -------------------------------------------------- 2 files changed, 71 deletions(-) delete mode 100644 include/linux/unwind.h (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 3bfed013350b..03cb93d1865a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -294,9 +294,6 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; - /* The handle returned from unwind_add_table. */ - void *unwind_info; - /* Arch-specific module values */ struct mod_arch_specific arch; diff --git a/include/linux/unwind.h b/include/linux/unwind.h deleted file mode 100644 index 7760860fa170..000000000000 --- a/include/linux/unwind.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _LINUX_UNWIND_H -#define _LINUX_UNWIND_H - -/* - * Copyright (C) 2002-2006 Novell, Inc. - * Jan Beulich - * This code is released under version 2 of the GNU GPL. - * - * A simple API for unwinding kernel stacks. This is used for - * debugging and error reporting purposes. The kernel doesn't need - * full-blown stack unwinding with all the bells and whistles, so there - * is not much point in implementing the full Dwarf2 unwind API. - */ - -struct module; - -struct unwind_frame_info {}; - -static inline void unwind_init(void) {} -static inline void unwind_setup(void) {} - -#ifdef CONFIG_MODULES - -static inline void *unwind_add_table(struct module *mod, - const void *table_start, - unsigned long table_size) -{ - return NULL; -} - -static inline void unwind_remove_table(void *handle, int init_only) -{ -} - -#endif - -static inline int unwind_init_frame_info(struct unwind_frame_info *info, - struct task_struct *tsk, - const struct pt_regs *regs) -{ - return -ENOSYS; -} - -static inline int unwind_init_blocked(struct unwind_frame_info *info, - struct task_struct *tsk) -{ - return -ENOSYS; -} - -static inline int unwind_init_running(struct unwind_frame_info *info, - asmlinkage int (*cb)(struct unwind_frame_info *, - void *arg), - void *arg) -{ - return -ENOSYS; -} - -static inline int unwind(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -static inline int unwind_to_user(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -#endif /* _LINUX_UNWIND_H */ -- cgit v1.2.3 From 9fe06081ef145d6582c39e18203b5fffe6f3abc2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 6 Jan 2009 14:40:51 -0800 Subject: Create a DIV_ROUND_CLOSEST macro to do division with rounding Create a helper macro to divide two numbers and round the result to the nearest whole number. This is a helper macro for hwmon drivers that want to convert incoming sysfs values per standard hwmon practice, though the macro itself can be used by anyone. Signed-off-by: Darrick J. Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ca9ff6411dfa..721984844c94 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -48,6 +48,12 @@ extern const char linux_proc_banner[]; #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define DIV_ROUND_CLOSEST(x, divisor)( \ +{ \ + typeof(divisor) __divisor = divisor; \ + (((x) + ((__divisor) / 2)) / (__divisor)); \ +} \ +) #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) -- cgit v1.2.3 From 5f820f648c92a5ecc771a96b3c29aa6e90013bba Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Jan 2009 14:40:59 -0800 Subject: poll: allow f_op->poll to sleep f_op->poll is the only vfs operation which is not allowed to sleep. It's because poll and select implementation used task state to synchronize against wake ups, which doesn't have to be the case anymore as wait/wake interface can now use custom wake up functions. The non-sleep restriction can be a bit tricky because ->poll is not called from an atomic context and the result of accidentally sleeping in ->poll only shows up as temporary busy looping when the timing is right or rather wrong. This patch converts poll/select to use custom wake up function and use separate triggered variable to synchronize against wake up events. The only added overhead is an extra function call during wake up and negligible. This patch removes the one non-sleep exception from vfs locking rules and is beneficial to userland filesystem implementations like FUSE, 9p or peculiar fs like spufs as it's very difficult for those to implement non-sleeping poll method. While at it, make the following cosmetic changes to make poll.h and select.c checkpatch friendly. * s/type * symbol/type *symbol/ : three places in poll.h * remove blank line before EXPORT_SYMBOL() : two places in select.c Oleg: spotted missing barrier in poll_schedule_timeout() Davide: spotted missing write barrier in pollwake() Signed-off-by: Tejun Heo Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Ingo Molnar Cc: Christoph Hellwig Signed-off-by: Miklos Szeredi Cc: Davide Libenzi Cc: Brad Boyer Cc: Al Viro Cc: Roland McGrath Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Cc: Davide Libenzi Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poll.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poll.h b/include/linux/poll.h index badd98ab06f6..8c24ef8d9976 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -46,9 +46,9 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) } struct poll_table_entry { - struct file * filp; + struct file *filp; wait_queue_t wait; - wait_queue_head_t * wait_address; + wait_queue_head_t *wait_address; }; /* @@ -56,7 +56,9 @@ struct poll_table_entry { */ struct poll_wqueues { poll_table pt; - struct poll_table_page * table; + struct poll_table_page *table; + struct task_struct *polling_task; + int triggered; int error; int inline_index; struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES]; @@ -64,6 +66,13 @@ struct poll_wqueues { extern void poll_initwait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq); +extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, + ktime_t *expires, unsigned long slack); + +static inline int poll_schedule(struct poll_wqueues *pwq, int state) +{ + return poll_schedule_timeout(pwq, state, NULL, 0); +} /* * Scaleable version of the fd_set. -- cgit v1.2.3 From 179f7ebff6be45738c6e2fa68c8d2cc5c2c6308e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Jan 2009 14:41:04 -0800 Subject: percpu_counter: FBC_BATCH should be a variable For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS Considering more and more distros are using high NR_CPUS values, it makes sense to use a more sensible value for FBC_BATCH, and get rid of NR_CPUS. A sensible value is 2*num_online_cpus(), with a minimum value of 32 (This minimum value helps branch prediction in __percpu_counter_add()) We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically. We rename FBC_BATCH to percpu_counter_batch since its not a constant anymore. Signed-off-by: Eric Dumazet Acked-by: David S. Miller Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc112..99de7a31bab8 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -24,11 +24,7 @@ struct percpu_counter { s32 *counters; }; -#if NR_CPUS >= 16 -#define FBC_BATCH (NR_CPUS*2) -#else -#define FBC_BATCH (NR_CPUS*4) -#endif +extern int percpu_counter_batch; int percpu_counter_init(struct percpu_counter *fbc, s64 amount); int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); @@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { - __percpu_counter_add(fbc, amount, FBC_BATCH); + __percpu_counter_add(fbc, amount, percpu_counter_batch); } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) -- cgit v1.2.3 From 8c3659347efb43857b2c2d7bc63a9c7d68d1a608 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 6 Jan 2009 14:41:14 -0800 Subject: include/linux/interrupt.h: do not include linux/irqnr.h twice Signed-off-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/interrupt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0702c4d7bdf0..af886b26c9d1 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 5cf0cc4e670b8da2231a3375db87ec3b6cb84432 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 6 Jan 2009 14:41:38 -0800 Subject: binfmts.h: include list.h linux_binfmt uses list_head, so list.h is needed. [akpm@linux-foundation.org: fix `make headerscheck'] Signed-off-by: Hiroshi Shimamoto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 6cbfbe297180..0d0150b4901e 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -18,6 +18,7 @@ struct pt_regs; #define BINPRM_BUF_SIZE 128 #ifdef __KERNEL__ +#include #define CORENAME_MAX_SIZE 128 -- cgit v1.2.3 From d29389de0b0ee1715333bafc6ac3f22a75aa4313 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 6 Jan 2009 14:41:41 -0800 Subject: spi_gpio driver Generalize the old at91rm9200 "bootstrap" bitbanging SPI master driver as "spi_gpio", so it works with arbitrary GPIOs and can be configured through platform_data. Such SPI masters support: - any number of bus instances (bus_num is the platform_device.id) - any number of chipselects (one GPIO per spi_device) - all four SPI_MODE values, and SPI_CS_HIGH - i/o word sizes from 1 to 32 bits; - devices configured as with any other spi_master controller When configured using platform_data, this provides relatively low clock rates. On platforms that support inlined GPIO calls, significantly improved transfer speeds are also possible with a semi-custom driver. (It's still painful when accessing flash memory, but less so.) Sanity checked by using this version to replace both native controllers on a board with six different SPI slaves, relying on three different SPI_MODE_* values and both SPI_CS_HIGH settings for correct operation. [akpm@linux-foundation.org: cleanups] Signed-off-by: David Brownell Acked-by: Magnus Damm Tested-by: Magnus Damm Cc: Torgil Svensson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/spi_gpio.h | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 include/linux/spi/spi_gpio.h (limited to 'include/linux') diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h new file mode 100644 index 000000000000..0f01a0f1f40c --- /dev/null +++ b/include/linux/spi/spi_gpio.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_SPI_GPIO_H +#define __LINUX_SPI_GPIO_H + +/* + * For each bitbanged SPI bus, set up a platform_device node with: + * - name "spi_gpio" + * - id the same as the SPI bus number it implements + * - dev.platform data pointing to a struct spi_gpio_platform_data + * + * Or, see the driver code for information about speedups that are + * possible on platforms that support inlined access for GPIOs (no + * spi_gpio_platform_data is used). + * + * Use spi_board_info with these busses in the usual way, being sure + * that the controller_data being the GPIO used for each device's + * chipselect: + * + * static struct spi_board_info ... [] = { + * ... + * // this slave uses GPIO 42 for its chipselect + * .controller_data = (void *) 42, + * ... + * // this one uses GPIO 86 for its chipselect + * .controller_data = (void *) 86, + * ... + * }; + * + * If the bitbanged bus is later switched to a "native" controller, + * that platform_device and controller_data should be removed. + */ + +/** + * struct spi_gpio_platform_data - parameter for bitbanged SPI master + * @sck: number of the GPIO used for clock output + * @mosi: number of the GPIO used for Master Output, Slave In (MOSI) data + * @miso: number of the GPIO used for Master Input, Slave Output (MISO) data + * @num_chipselect: how many slaves to allow + * + * All GPIO signals used with the SPI bus managed through this driver + * (chipselects, MOSI, MISO, SCK) must be configured as GPIOs, instead + * of some alternate function. + * + * It can be convenient to use this driver with pins that have alternate + * functions associated with a "native" SPI controller if a driver for that + * controller is not available, or is missing important functionality. + * + * On platforms which can do so, configure MISO with a weak pullup unless + * there's an external pullup on that signal. That saves power by avoiding + * floating signals. (A weak pulldown would save power too, but many + * drivers expect to see all-ones data as the no slave "response".) + */ +struct spi_gpio_platform_data { + unsigned sck; + unsigned mosi; + unsigned miso; + + u16 num_chipselect; +}; + +#endif /* __LINUX_SPI_GPIO_H */ -- cgit v1.2.3 From a06f6211ef9b1785922f9d0e8766d63ac4e66de1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:49 -0800 Subject: module: add within_module_core() and within_module_init() This series of patches allows kprobes to probe module's __init and __exit functions. This means, you can probe driver initialization and terminating. Currently, kprobes can't probe __init function because these functions are freed after module initialization. And it also can't probe module __exit functions because kprobe increments reference count of target module and user can't unload it. this means __exit functions never be called unless removing probes from the module. To solve both cases, this series of patches introduces GONE flag and sets it when the target code is freed(for this purpose, kprobes hooks MODULE_STATE_* events). This also removes refcount incrementing for allowing user to unload target module. Users can check which probes are GONE by debugfs interface. For taking timing of freeing module's .init text, these also include a patch which adds module's notifier of MODULE_STATE_LIVE event. This patch: Add within_module_core() and within_module_init() for checking whether an address is in the module .init.text section or .text section, and replace within() local inline functions in kernel/module.c with them. kprobes uses these functions to check where the kprobe is inserted. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 03cb93d1865a..4f7ea12463d3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -365,6 +365,18 @@ struct module *module_text_address(unsigned long addr); struct module *__module_text_address(unsigned long addr); int is_module_address(unsigned long addr); +static inline int within_module_core(unsigned long addr, struct module *mod) +{ + return (unsigned long)mod->module_core <= addr && + addr < (unsigned long)mod->module_core + mod->core_size; +} + +static inline int within_module_init(unsigned long addr, struct module *mod) +{ + return (unsigned long)mod->module_init <= addr && + addr < (unsigned long)mod->module_init + mod->init_size; +} + /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, -- cgit v1.2.3 From 129415607845d4daea11ddcba706005c69dcb942 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:50 -0800 Subject: kprobes: add kprobe_insn_mutex and cleanup arch_remove_kprobe() Add kprobe_insn_mutex for protecting kprobe_insn_pages hlist, and remove kprobe_mutex from architecture dependent code. This allows us to call arch_remove_kprobe() (and free_insn_slot) while holding kprobe_mutex. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Russell King Cc: "Luck, Tony" Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 497b1d1f7a05..b93e44ce2284 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -201,7 +201,6 @@ static inline int init_test_probes(void) } #endif /* CONFIG_KPROBES_SANITY_TEST */ -extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); -- cgit v1.2.3 From e8386a0cb22f4a2d439384212c494ad0bda848fe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:52 -0800 Subject: kprobes: support probing module __exit function Allows kprobes to probe __exit routine. This adds flags member to struct kprobe. When module is freed(kprobes hooks module_notifier to get this event), kprobes which probe the functions in that module are set to "Gone" flag to the flags member. These "Gone" probes are never be enabled. Users can check the GONE flag through debugfs. This also removes mod_refcounted, because we couldn't free a module if kprobe incremented the refcount of that module. [akpm@linux-foundation.org: document some locking] [mhiramat@redhat.com: bugfix: pass aggr_kprobe to arch_remove_kprobe] [mhiramat@redhat.com: bugfix: release old_p's insn_slot before error return] Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Signed-off-by: Masami Hiramatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b93e44ce2284..d6ea19e314bb 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -69,9 +69,6 @@ struct kprobe { /* list of kprobes for multi-handler support */ struct list_head list; - /* Indicates that the corresponding module has been ref counted */ - unsigned int mod_refcounted; - /*count the number of times this probe was temporarily disarmed */ unsigned long nmissed; @@ -103,8 +100,19 @@ struct kprobe { /* copy of the original instruction */ struct arch_specific_insn ainsn; + + /* Indicates various status flags. Protected by kprobe_mutex. */ + u32 flags; }; +/* Kprobe status flags */ +#define KPROBE_FLAG_GONE 1 /* breakpoint has already gone */ + +static inline int kprobe_gone(struct kprobe *p) +{ + return p->flags & KPROBE_FLAG_GONE; +} + /* * Special probe type that uses setjmp-longjmp type tricks to resume * execution at a specified entry with a matching prototype corresponding -- cgit v1.2.3 From 730c9eeca9808fc2cfb506cc68c90aa330da17b0 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 6 Jan 2009 14:42:06 -0800 Subject: autofs4: improve parameter usage The parameter usage in the device node ioctl code uses arg1 and arg2 as parameter names. This patch redefines the parameter names to reflect what they actually are in an effort to make the code more readable. Signed-off-by: Ian Kent Signed-off-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_dev-ioctl.h | 75 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index f4d05ccd731f..91a773993a5c 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -10,6 +10,7 @@ #ifndef _LINUX_AUTO_DEV_IOCTL_H #define _LINUX_AUTO_DEV_IOCTL_H +#include #include #define AUTOFS_DEVICE_NAME "autofs" @@ -25,6 +26,60 @@ * An ioctl interface for autofs mount point control. */ +struct args_protover { + __u32 version; +}; + +struct args_protosubver { + __u32 sub_version; +}; + +struct args_openmount { + __u32 devid; +}; + +struct args_ready { + __u32 token; +}; + +struct args_fail { + __u32 token; + __s32 status; +}; + +struct args_setpipefd { + __s32 pipefd; +}; + +struct args_timeout { + __u64 timeout; +}; + +struct args_requester { + __u32 uid; + __u32 gid; +}; + +struct args_expire { + __u32 how; +}; + +struct args_askumount { + __u32 may_umount; +}; + +struct args_ismountpoint { + union { + struct args_in { + __u32 type; + } in; + struct args_out { + __u32 devid; + __u32 magic; + } out; + }; +}; + /* * All the ioctls use this structure. * When sending a path size must account for the total length @@ -39,20 +94,32 @@ struct autofs_dev_ioctl { * including this struct */ __s32 ioctlfd; /* automount command fd */ - __u32 arg1; /* Command parameters */ - __u32 arg2; + /* Command parameters */ + + union { + struct args_protover protover; + struct args_protosubver protosubver; + struct args_openmount openmount; + struct args_ready ready; + struct args_fail fail; + struct args_setpipefd setpipefd; + struct args_timeout timeout; + struct args_requester requester; + struct args_expire expire; + struct args_askumount askumount; + struct args_ismountpoint ismountpoint; + }; char path[0]; }; static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) { + memset(in, 0, sizeof(struct autofs_dev_ioctl)); in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; in->size = sizeof(struct autofs_dev_ioctl); in->ioctlfd = -1; - in->arg1 = 0; - in->arg2 = 0; return; } -- cgit v1.2.3 From a92daf6ba1f9ace8584edc8eb557a77aa7c2c71d Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 6 Jan 2009 14:42:08 -0800 Subject: autofs4: make autofs type usage explicit - the type assigned at mount when no type is given is changed from 0 to AUTOFS_TYPE_INDIRECT. This was done because 0 and AUTOFS_TYPE_INDIRECT were being treated implicitly as the same type. - previously, an offset mount had it's type set to AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET but the mount control re-implementation needs to be able distinguish all three types. So this was changed to make the type setting explicit. - a type AUTOFS_TYPE_ANY was added for use by the re-implementation when checking if a given path is a mountpoint. It's not really a type as we use this to ask if a given path is a mountpoint in the autofs_dev_ioctl_ismountpoint() function. - functions to set and test the autofs mount types have been added to improve readability and make the type usage explicit. - the mount type is used from user space for the mount control re-implementtion so, for consistency, all the definitions have been moved to the user space include file include/linux/auto_fs4.h. Signed-off-by: Ian Kent Signed-off-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_fs4.h | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index 2253716d4b92..55fa478bd639 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -29,10 +29,64 @@ #define AUTOFS_EXP_IMMEDIATE 1 #define AUTOFS_EXP_LEAVES 2 -#define AUTOFS_TYPE_ANY 0x0000 -#define AUTOFS_TYPE_INDIRECT 0x0001 -#define AUTOFS_TYPE_DIRECT 0x0002 -#define AUTOFS_TYPE_OFFSET 0x0004 +#define AUTOFS_TYPE_ANY 0U +#define AUTOFS_TYPE_INDIRECT 1U +#define AUTOFS_TYPE_DIRECT 2U +#define AUTOFS_TYPE_OFFSET 4U + +static inline void set_autofs_type_indirect(unsigned int *type) +{ + *type = AUTOFS_TYPE_INDIRECT; + return; +} + +static inline unsigned int autofs_type_indirect(unsigned int type) +{ + return (type == AUTOFS_TYPE_INDIRECT); +} + +static inline void set_autofs_type_direct(unsigned int *type) +{ + *type = AUTOFS_TYPE_DIRECT; + return; +} + +static inline unsigned int autofs_type_direct(unsigned int type) +{ + return (type == AUTOFS_TYPE_DIRECT); +} + +static inline void set_autofs_type_offset(unsigned int *type) +{ + *type = AUTOFS_TYPE_OFFSET; + return; +} + +static inline unsigned int autofs_type_offset(unsigned int type) +{ + return (type == AUTOFS_TYPE_OFFSET); +} + +static inline unsigned int autofs_type_trigger(unsigned int type) +{ + return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET); +} + +/* + * This isn't really a type as we use it to say "no type set" to + * indicate we want to search for "any" mount in the + * autofs_dev_ioctl_ismountpoint() device ioctl function. + */ +static inline void set_autofs_type_any(unsigned int *type) +{ + *type = AUTOFS_TYPE_ANY; + return; +} + +static inline unsigned int autofs_type_any(unsigned int type) +{ + return (type == AUTOFS_TYPE_ANY); +} /* Daemon notification packet types */ enum autofs_notify { -- cgit v1.2.3 From cabb3fc4bd1628c37c37e054960eb3e4bf30dc26 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 6 Jan 2009 14:42:26 -0800 Subject: twl4030-gpio: cleanup debounce Provide a static debounce configuration mechanism for twl4030 GPIOs, replacing the previous dynamic one. The single user of that mechanism was for MMC card detect debouncing. Boards can provide a bitmask saying which GPIOs to debounce (30 msec). It's always enabled for pins with the MMC card-detect/VMMCx link active, so most boards won't need to set the debounce mask. This is a net code shrink, including runtime footprint. Signed-off-by: David Brownell Signed-off-by: Tony Lindgren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/twl4030.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index a8f84c01f82e..8137f660a5cc 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -234,6 +234,9 @@ struct twl4030_gpio_platform_data { /* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */ u8 mmc_cd; + /* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */ + u32 debounce; + /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup * should be enabled. Else, if that bit is set in "pulldowns", * that pulldown is enabled. Don't waste power by letting any @@ -307,12 +310,6 @@ int twl4030_sih_setup(int module); #define TWL4030_VAUX3_DEV_GRP 0x1F #define TWL4030_VAUX3_DEDICATED 0x22 -/* - * Exported TWL4030 GPIO APIs - * - * WARNING -- use standard GPIO and IRQ calls instead; these will vanish. - */ -int twl4030_set_gpio_debounce(int gpio, int enable); #if defined(CONFIG_TWL4030_BCI_BATTERY) || \ defined(CONFIG_TWL4030_BCI_BATTERY_MODULE) -- cgit v1.2.3 From d3635abfee0c55ad9dcd6b6172a0c6a5455900c9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:41 -0800 Subject: rapidio: remove excess kernel-doc notation Remove excess kernel-doc notation from rio header and driver: Warning(include/linux/rio_drv.h:399): Excess function parameter or struct member 'buffer' description in 'rio_get_inb_message' Signed-off-by: Randy Dunlap Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rio_drv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 32c0547ffafc..c93a58a40033 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -391,7 +391,6 @@ static inline int rio_add_inb_buffer(struct rio_mport *mport, int mbox, * rio_get_inb_message - Get A RIO message from an inbound mailbox queue * @mport: Master port containing the inbound mailbox * @mbox: The inbound mailbox number - * @buffer: Pointer to the message buffer * * Get a RIO message from an inbound mailbox queue. Returns 0 on success. */ -- cgit v1.2.3 From 8cd3ac3aca3f2afe8570708066d64d893da468e8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 Jan 2009 14:42:48 -0800 Subject: fs/exec.c: make do_coredump() void No one cares do_coredump()'s return value, and also it seems that it is also not necessary. So make it void. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: WANG Cong Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 0d0150b4901e..77b4a9e46004 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -107,7 +107,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); -extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); +extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); extern int set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); -- cgit v1.2.3 From 991c0e6d1ae3df59f0ddfe05edecec8319e35a1b Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 6 Jan 2009 14:56:21 -0800 Subject: byteorder: only use linux/swab.h The first step to make swab.h a regular header that will include an asm/swab.h with arch overrides. Avoid the gratuitous differences introduced in the new linux/swab.h by naming the ___constant_swabXX bits and __fswabXX bits exactly as found in the old implementation in byteorder/swab[b].h Use this new swab.h in byteorder/[big|little]_endian.h and remove the two old swab headers. Although the inclusion of asm/byteorder.h looks strange in linux/swab.h, this will allow each arch to move the actual arch overrides for the swab bits in an asm file and then the includes can be cleaned up without requiring a flag day for all arches at once. Keep providing __fswabXX in case some userspace was using them directly, but the revised __swabXX should be used instead in any new code and will always do constant folding not dependent on the optimization level, which means the __constant versions can be phased out in-kernel. Arches that use the old-style arch macros will lose their optimized versions until they move to the new style, but at least they will still compile. Many arches have already moved and the patches to move the remaining arches are trivial. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- include/linux/byteorder/Kbuild | 2 - include/linux/byteorder/big_endian.h | 3 +- include/linux/byteorder/little_endian.h | 3 +- include/linux/byteorder/swab.h | 222 -------------------------------- include/linux/byteorder/swabb.h | 135 ------------------- include/linux/swab.h | 50 +++---- 6 files changed, 27 insertions(+), 388 deletions(-) delete mode 100644 include/linux/byteorder/swab.h delete mode 100644 include/linux/byteorder/swabb.h (limited to 'include/linux') diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild index fbaa7f9cee32..38437225b092 100644 --- a/include/linux/byteorder/Kbuild +++ b/include/linux/byteorder/Kbuild @@ -1,4 +1,2 @@ unifdef-y += big_endian.h unifdef-y += little_endian.h -unifdef-y += swab.h -unifdef-y += swabb.h diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h index 1cba3f3efe5f..3c80fd7e8b56 100644 --- a/include/linux/byteorder/big_endian.h +++ b/include/linux/byteorder/big_endian.h @@ -9,8 +9,7 @@ #endif #include -#include -#include +#include #define __constant_htonl(x) ((__force __be32)(__u32)(x)) #define __constant_ntohl(x) ((__force __u32)(__be32)(x)) diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h index cedc1b5a289c..83195fb82962 100644 --- a/include/linux/byteorder/little_endian.h +++ b/include/linux/byteorder/little_endian.h @@ -9,8 +9,7 @@ #endif #include -#include -#include +#include #define __constant_htonl(x) ((__force __be32)___constant_swab32((x))) #define __constant_ntohl(x) ___constant_swab32((__force __be32)(x)) diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h deleted file mode 100644 index 142134ff1645..000000000000 --- a/include/linux/byteorder/swab.h +++ /dev/null @@ -1,222 +0,0 @@ -#ifndef _LINUX_BYTEORDER_SWAB_H -#define _LINUX_BYTEORDER_SWAB_H - -/* - * linux/byteorder/swab.h - * Byte-swapping, independently from CPU endianness - * swabXX[ps]?(foo) - * - * Francois-Rene Rideau 19971205 - * separated swab functions from cpu_to_XX, - * to clean up support for bizarre-endian architectures. - * - * Trent Piepho 2007114 - * make constant-folding work, provide C versions that - * gcc can optimize better, explain different versions - * - * See asm-i386/byteorder.h and suches for examples of how to provide - * architecture-dependent optimized versions - * - */ - -#include - -/* Functions/macros defined, there are a lot: - * - * ___swabXX - * Generic C versions of the swab functions. - * - * ___constant_swabXX - * C versions that gcc can fold into a compile-time constant when - * the argument is a compile-time constant. - * - * __arch__swabXX[sp]? - * Architecture optimized versions of all the swab functions - * (including the s and p versions). These can be defined in - * asm-arch/byteorder.h. Any which are not, are defined here. - * __arch__swabXXs() is defined in terms of __arch__swabXXp(), which - * is defined in terms of __arch__swabXX(), which is in turn defined - * in terms of ___swabXX(x). - * These must be macros. They may be unsafe for arguments with - * side-effects. - * - * __fswabXX - * Inline function versions of the __arch__ macros. These _are_ safe - * if the arguments have side-effects. Note there are no s and p - * versions of these. - * - * __swabXX[sb] - * There are the ones you should actually use. The __swabXX versions - * will be a constant given a constant argument and use the arch - * specific code (if any) for non-constant arguments. The s and p - * versions always use the arch specific code (constant folding - * doesn't apply). They are safe to use with arguments with - * side-effects. - * - * swabXX[sb] - * Nicknames for __swabXX[sb] to use in the kernel. - */ - -/* casts are necessary for constants, because we never know how for sure - * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. - */ - -static __inline__ __attribute_const__ __u16 ___swab16(__u16 x) -{ - return x<<8 | x>>8; -} -static __inline__ __attribute_const__ __u32 ___swab32(__u32 x) -{ - return x<<24 | x>>24 | - (x & (__u32)0x0000ff00UL)<<8 | - (x & (__u32)0x00ff0000UL)>>8; -} -static __inline__ __attribute_const__ __u64 ___swab64(__u64 x) -{ - return x<<56 | x>>56 | - (x & (__u64)0x000000000000ff00ULL)<<40 | - (x & (__u64)0x0000000000ff0000ULL)<<24 | - (x & (__u64)0x00000000ff000000ULL)<< 8 | - (x & (__u64)0x000000ff00000000ULL)>> 8 | - (x & (__u64)0x0000ff0000000000ULL)>>24 | - (x & (__u64)0x00ff000000000000ULL)>>40; -} - -#define ___constant_swab16(x) \ - ((__u16)( \ - (((__u16)(x) & (__u16)0x00ffU) << 8) | \ - (((__u16)(x) & (__u16)0xff00U) >> 8) )) -#define ___constant_swab32(x) \ - ((__u32)( \ - (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ - (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ - (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ - (((__u32)(x) & (__u32)0xff000000UL) >> 24) )) -#define ___constant_swab64(x) \ - ((__u64)( \ - (__u64)(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \ - (__u64)(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \ - (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \ - (__u64)(((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \ - (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \ - (__u64)(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56) )) - -/* - * provide defaults when no architecture-specific optimization is detected - */ -#ifndef __arch__swab16 -# define __arch__swab16(x) ___swab16(x) -#endif -#ifndef __arch__swab32 -# define __arch__swab32(x) ___swab32(x) -#endif -#ifndef __arch__swab64 -# define __arch__swab64(x) ___swab64(x) -#endif - -#ifndef __arch__swab16p -# define __arch__swab16p(x) __arch__swab16(*(x)) -#endif -#ifndef __arch__swab32p -# define __arch__swab32p(x) __arch__swab32(*(x)) -#endif -#ifndef __arch__swab64p -# define __arch__swab64p(x) __arch__swab64(*(x)) -#endif - -#ifndef __arch__swab16s -# define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x))) -#endif -#ifndef __arch__swab32s -# define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x))) -#endif -#ifndef __arch__swab64s -# define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x))) -#endif - - -/* - * Allow constant folding - */ -#if defined(__GNUC__) && defined(__OPTIMIZE__) -# define __swab16(x) \ -(__builtin_constant_p((__u16)(x)) ? \ - ___constant_swab16((x)) : \ - __fswab16((x))) -# define __swab32(x) \ -(__builtin_constant_p((__u32)(x)) ? \ - ___constant_swab32((x)) : \ - __fswab32((x))) -# define __swab64(x) \ -(__builtin_constant_p((__u64)(x)) ? \ - ___constant_swab64((x)) : \ - __fswab64((x))) -#else -# define __swab16(x) __fswab16(x) -# define __swab32(x) __fswab32(x) -# define __swab64(x) __fswab64(x) -#endif /* OPTIMIZE */ - - -static __inline__ __attribute_const__ __u16 __fswab16(__u16 x) -{ - return __arch__swab16(x); -} -static __inline__ __u16 __swab16p(const __u16 *x) -{ - return __arch__swab16p(x); -} -static __inline__ void __swab16s(__u16 *addr) -{ - __arch__swab16s(addr); -} - -static __inline__ __attribute_const__ __u32 __fswab32(__u32 x) -{ - return __arch__swab32(x); -} -static __inline__ __u32 __swab32p(const __u32 *x) -{ - return __arch__swab32p(x); -} -static __inline__ void __swab32s(__u32 *addr) -{ - __arch__swab32s(addr); -} - -#ifdef __BYTEORDER_HAS_U64__ -static __inline__ __attribute_const__ __u64 __fswab64(__u64 x) -{ -# ifdef __SWAB_64_THRU_32__ - __u32 h = x >> 32; - __u32 l = x & ((1ULL<<32)-1); - return (((__u64)__swab32(l)) << 32) | ((__u64)(__swab32(h))); -# else - return __arch__swab64(x); -# endif -} -static __inline__ __u64 __swab64p(const __u64 *x) -{ - return __arch__swab64p(x); -} -static __inline__ void __swab64s(__u64 *addr) -{ - __arch__swab64s(addr); -} -#endif /* __BYTEORDER_HAS_U64__ */ - -#if defined(__KERNEL__) -#define swab16 __swab16 -#define swab32 __swab32 -#define swab64 __swab64 -#define swab16p __swab16p -#define swab32p __swab32p -#define swab64p __swab64p -#define swab16s __swab16s -#define swab32s __swab32s -#define swab64s __swab64s -#endif - -#endif /* _LINUX_BYTEORDER_SWAB_H */ diff --git a/include/linux/byteorder/swabb.h b/include/linux/byteorder/swabb.h deleted file mode 100644 index 8c780c7d779e..000000000000 --- a/include/linux/byteorder/swabb.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef _LINUX_BYTEORDER_SWABB_H -#define _LINUX_BYTEORDER_SWABB_H - -/* - * linux/byteorder/swabb.h - * SWAp Bytes Bizarrely - * swaHHXX[ps]?(foo) - * - * Support for obNUXIous pdp-endian and other bizarre architectures. - * Will Linux ever run on such ancient beasts? if not, this file - * will be but a programming pearl. Still, it's a reminder that we - * shouldn't be making too many assumptions when trying to be portable. - * - */ - -/* - * Meaning of the names I chose (vaxlinux people feel free to correct them): - * swahw32 swap 16-bit half-words in a 32-bit word - * swahb32 swap 8-bit halves of each 16-bit half-word in a 32-bit word - * - * No 64-bit support yet. I don't know NUXI conventions for long longs. - * I guarantee it will be a mess when it's there, though :-> - * It will be even worse if there are conflicting 64-bit conventions. - * Hopefully, no one ever used 64-bit objects on NUXI machines. - * - */ - -#include - -#define ___swahw32(x) \ -({ \ - __u32 __x = (x); \ - ((__u32)( \ - (((__u32)(__x) & (__u32)0x0000ffffUL) << 16) | \ - (((__u32)(__x) & (__u32)0xffff0000UL) >> 16) )); \ -}) -#define ___swahb32(x) \ -({ \ - __u32 __x = (x); \ - ((__u32)( \ - (((__u32)(__x) & (__u32)0x00ff00ffUL) << 8) | \ - (((__u32)(__x) & (__u32)0xff00ff00UL) >> 8) )); \ -}) - -#define ___constant_swahw32(x) \ - ((__u32)( \ - (((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \ - (((__u32)(x) & (__u32)0xffff0000UL) >> 16) )) -#define ___constant_swahb32(x) \ - ((__u32)( \ - (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \ - (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) )) - -/* - * provide defaults when no architecture-specific optimization is detected - */ -#ifndef __arch__swahw32 -# define __arch__swahw32(x) ___swahw32(x) -#endif -#ifndef __arch__swahb32 -# define __arch__swahb32(x) ___swahb32(x) -#endif - -#ifndef __arch__swahw32p -# define __arch__swahw32p(x) __swahw32(*(x)) -#endif -#ifndef __arch__swahb32p -# define __arch__swahb32p(x) __swahb32(*(x)) -#endif - -#ifndef __arch__swahw32s -# define __arch__swahw32s(x) do { *(x) = __swahw32p((x)); } while (0) -#endif -#ifndef __arch__swahb32s -# define __arch__swahb32s(x) do { *(x) = __swahb32p((x)); } while (0) -#endif - - -/* - * Allow constant folding - */ -#define __swahw32(x) \ -(__builtin_constant_p((__u32)(x)) ? \ - ___swahw32((x)) : \ - __fswahw32((x))) -#define __swahb32(x) \ -(__builtin_constant_p((__u32)(x)) ? \ - ___swahb32((x)) : \ - __fswahb32((x))) - - -static inline __u32 __fswahw32(__u32 x) -{ - return __arch__swahw32(x); -} - -static inline __u32 __swahw32p(__u32 *x) -{ - return __arch__swahw32p(x); -} - -static inline void __swahw32s(__u32 *addr) -{ - __arch__swahw32s(addr); -} - -static inline __u32 __fswahb32(__u32 x) -{ - return __arch__swahb32(x); -} - -static inline __u32 __swahb32p(__u32 *x) -{ - return __arch__swahb32p(x); -} - -static inline void __swahb32s(__u32 *addr) -{ - __arch__swahb32s(addr); -} - -#ifdef __BYTEORDER_HAS_U64__ -/* - * Not supported yet - */ -#endif /* __BYTEORDER_HAS_U64__ */ - -#define swahw32 __swahw32 -#define swahb32 __swahb32 -#define swahw32p __swahw32p -#define swahb32p __swahb32p -#define swahw32s __swahw32s -#define swahb32s __swahb32s - -#endif /* _LINUX_BYTEORDER_SWABB_H */ diff --git a/include/linux/swab.h b/include/linux/swab.h index bbed279f3b32..9a2d33e0a98a 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -9,17 +9,17 @@ * casts are necessary for constants, because we never know how for sure * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. */ -#define __const_swab16(x) ((__u16)( \ +#define ___constant_swab16(x) ((__u16)( \ (((__u16)(x) & (__u16)0x00ffU) << 8) | \ (((__u16)(x) & (__u16)0xff00U) >> 8))) -#define __const_swab32(x) ((__u32)( \ +#define ___constant_swab32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ (((__u32)(x) & (__u32)0xff000000UL) >> 24))) -#define __const_swab64(x) ((__u64)( \ +#define ___constant_swab64(x) ((__u64)( \ (((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \ (((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \ (((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \ @@ -29,11 +29,11 @@ (((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \ (((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56))) -#define __const_swahw32(x) ((__u32)( \ +#define ___constant_swahw32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \ (((__u32)(x) & (__u32)0xffff0000UL) >> 16))) -#define __const_swahb32(x) ((__u32)( \ +#define ___constant_swahb32(x) ((__u32)( \ (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \ (((__u32)(x) & (__u32)0xff00ff00UL) >> 8))) @@ -43,25 +43,25 @@ * ___swab16, ___swab32, ___swab64, ___swahw32, ___swahb32 */ -static inline __attribute_const__ __u16 ___swab16(__u16 val) +static inline __attribute_const__ __u16 __fswab16(__u16 val) { #ifdef __arch_swab16 return __arch_swab16(val); #else - return __const_swab16(val); + return ___constant_swab16(val); #endif } -static inline __attribute_const__ __u32 ___swab32(__u32 val) +static inline __attribute_const__ __u32 __fswab32(__u32 val) { #ifdef __arch_swab32 return __arch_swab32(val); #else - return __const_swab32(val); + return ___constant_swab32(val); #endif } -static inline __attribute_const__ __u64 ___swab64(__u64 val) +static inline __attribute_const__ __u64 __fswab64(__u64 val) { #ifdef __arch_swab64 return __arch_swab64(val); @@ -70,25 +70,25 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val) __u32 l = val & ((1ULL << 32) - 1); return (((__u64)___swab32(l)) << 32) | ((__u64)(___swab32(h))); #else - return __const_swab64(val); + return ___constant_swab64(val); #endif } -static inline __attribute_const__ __u32 ___swahw32(__u32 val) +static inline __attribute_const__ __u32 __fswahw32(__u32 val) { #ifdef __arch_swahw32 return __arch_swahw32(val); #else - return __const_swahw32(val); + return ___constant_swahw32(val); #endif } -static inline __attribute_const__ __u32 ___swahb32(__u32 val) +static inline __attribute_const__ __u32 __fswahb32(__u32 val) { #ifdef __arch_swahb32 return __arch_swahb32(val); #else - return __const_swahb32(val); + return ___constant_swahb32(val); #endif } @@ -98,8 +98,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swab16(x) \ (__builtin_constant_p((__u16)(x)) ? \ - __const_swab16((x)) : \ - ___swab16((x))) + ___constant_swab16(x) : \ + __fswab16(x)) /** * __swab32 - return a byteswapped 32-bit value @@ -107,8 +107,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swab32(x) \ (__builtin_constant_p((__u32)(x)) ? \ - __const_swab32((x)) : \ - ___swab32((x))) + ___constant_swab32(x) : \ + __fswab32(x)) /** * __swab64 - return a byteswapped 64-bit value @@ -116,8 +116,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swab64(x) \ (__builtin_constant_p((__u64)(x)) ? \ - __const_swab64((x)) : \ - ___swab64((x))) + ___constant_swab64(x) : \ + __fswab64(x)) /** * __swahw32 - return a word-swapped 32-bit value @@ -127,8 +127,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swahw32(x) \ (__builtin_constant_p((__u32)(x)) ? \ - __const_swahw32((x)) : \ - ___swahw32((x))) + ___constant_swahw32(x) : \ + __fswahw32(x)) /** * __swahb32 - return a high and low byte-swapped 32-bit value @@ -138,8 +138,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) */ #define __swahb32(x) \ (__builtin_constant_p((__u32)(x)) ? \ - __const_swahb32((x)) : \ - ___swahb32((x))) + ___constant_swahb32(x) : \ + __fswahb32(x)) /** * __swab16p - return a byteswapped 16-bit value from a pointer -- cgit v1.2.3 From 637b180c23313f2964e0ef20f1ee375203866968 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 6 Jan 2009 13:30:58 -0800 Subject: byteorder: remove the now unused byteorder.h This implementation caused problems in userspace which can, and does define _both_ __LITTLE_ENDIAN and __BIG_ENDIAN. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 - include/linux/byteorder.h | 372 ---------------------------------------------- 2 files changed, 373 deletions(-) delete mode 100644 include/linux/byteorder.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 39da666067b9..a3323f337e4d 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -179,7 +179,6 @@ unifdef-y += auto_fs.h unifdef-y += auxvec.h unifdef-y += binfmts.h unifdef-y += blktrace_api.h -unifdef-y += byteorder.h unifdef-y += capability.h unifdef-y += capi.h unifdef-y += cciss_ioctl.h diff --git a/include/linux/byteorder.h b/include/linux/byteorder.h deleted file mode 100644 index 29f002d73d98..000000000000 --- a/include/linux/byteorder.h +++ /dev/null @@ -1,372 +0,0 @@ -#ifndef _LINUX_BYTEORDER_H -#define _LINUX_BYTEORDER_H - -#include -#include - -#if defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) -# error Fix asm/byteorder.h to define one endianness -#endif - -#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN) -# error Fix asm/byteorder.h to define arch endianness -#endif - -#ifdef __LITTLE_ENDIAN -# undef __LITTLE_ENDIAN -# define __LITTLE_ENDIAN 1234 -#endif - -#ifdef __BIG_ENDIAN -# undef __BIG_ENDIAN -# define __BIG_ENDIAN 4321 -#endif - -#if defined(__LITTLE_ENDIAN) && !defined(__LITTLE_ENDIAN_BITFIELD) -# define __LITTLE_ENDIAN_BITFIELD -#endif - -#if defined(__BIG_ENDIAN) && !defined(__BIG_ENDIAN_BITFIELD) -# define __BIG_ENDIAN_BITFIELD -#endif - -#ifdef __LITTLE_ENDIAN -# define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) -# define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) -# define __le64_to_cpu(x) ((__force __u64)(__le64)(x)) -# define __cpu_to_le16(x) ((__force __le16)(__u16)(x)) -# define __cpu_to_le32(x) ((__force __le32)(__u32)(x)) -# define __cpu_to_le64(x) ((__force __le64)(__u64)(x)) - -# define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x)) -# define __be32_to_cpu(x) __swab32((__force __u32)(__be32)(x)) -# define __be64_to_cpu(x) __swab64((__force __u64)(__be64)(x)) -# define __cpu_to_be16(x) ((__force __be16)__swab16(x)) -# define __cpu_to_be32(x) ((__force __be32)__swab32(x)) -# define __cpu_to_be64(x) ((__force __be64)__swab64(x)) -#endif - -#ifdef __BIG_ENDIAN -# define __be16_to_cpu(x) ((__force __u16)(__be16)(x)) -# define __be32_to_cpu(x) ((__force __u32)(__be32)(x)) -# define __be64_to_cpu(x) ((__force __u64)(__be64)(x)) -# define __cpu_to_be16(x) ((__force __be16)(__u16)(x)) -# define __cpu_to_be32(x) ((__force __be32)(__u32)(x)) -# define __cpu_to_be64(x) ((__force __be64)(__u64)(x)) - -# define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x)) -# define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x)) -# define __le64_to_cpu(x) __swab64((__force __u64)(__le64)(x)) -# define __cpu_to_le16(x) ((__force __le16)__swab16(x)) -# define __cpu_to_le32(x) ((__force __le32)__swab32(x)) -# define __cpu_to_le64(x) ((__force __le64)__swab64(x)) -#endif - -/* - * These helpers could be phased out over time as the base version - * handles constant folding. - */ -#define __constant_htonl(x) __cpu_to_be32(x) -#define __constant_ntohl(x) __be32_to_cpu(x) -#define __constant_htons(x) __cpu_to_be16(x) -#define __constant_ntohs(x) __be16_to_cpu(x) - -#define __constant_le16_to_cpu(x) __le16_to_cpu(x) -#define __constant_le32_to_cpu(x) __le32_to_cpu(x) -#define __constant_le64_to_cpu(x) __le64_to_cpu(x) -#define __constant_be16_to_cpu(x) __be16_to_cpu(x) -#define __constant_be32_to_cpu(x) __be32_to_cpu(x) -#define __constant_be64_to_cpu(x) __be64_to_cpu(x) - -#define __constant_cpu_to_le16(x) __cpu_to_le16(x) -#define __constant_cpu_to_le32(x) __cpu_to_le32(x) -#define __constant_cpu_to_le64(x) __cpu_to_le64(x) -#define __constant_cpu_to_be16(x) __cpu_to_be16(x) -#define __constant_cpu_to_be32(x) __cpu_to_be32(x) -#define __constant_cpu_to_be64(x) __cpu_to_be64(x) - -static inline void __le16_to_cpus(__u16 *p) -{ -#ifdef __BIG_ENDIAN - __swab16s(p); -#endif -} - -static inline void __cpu_to_le16s(__u16 *p) -{ -#ifdef __BIG_ENDIAN - __swab16s(p); -#endif -} - -static inline void __le32_to_cpus(__u32 *p) -{ -#ifdef __BIG_ENDIAN - __swab32s(p); -#endif -} - -static inline void __cpu_to_le32s(__u32 *p) -{ -#ifdef __BIG_ENDIAN - __swab32s(p); -#endif -} - -static inline void __le64_to_cpus(__u64 *p) -{ -#ifdef __BIG_ENDIAN - __swab64s(p); -#endif -} - -static inline void __cpu_to_le64s(__u64 *p) -{ -#ifdef __BIG_ENDIAN - __swab64s(p); -#endif -} - -static inline void __be16_to_cpus(__u16 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab16s(p); -#endif -} - -static inline void __cpu_to_be16s(__u16 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab16s(p); -#endif -} - -static inline void __be32_to_cpus(__u32 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab32s(p); -#endif -} - -static inline void __cpu_to_be32s(__u32 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab32s(p); -#endif -} - -static inline void __be64_to_cpus(__u64 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab64s(p); -#endif -} - -static inline void __cpu_to_be64s(__u64 *p) -{ -#ifdef __LITTLE_ENDIAN - __swab64s(p); -#endif -} - -static inline __u16 __le16_to_cpup(const __le16 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __u16)*p; -#else - return __swab16p((__force __u16 *)p); -#endif -} - -static inline __u32 __le32_to_cpup(const __le32 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __u32)*p; -#else - return __swab32p((__force __u32 *)p); -#endif -} - -static inline __u64 __le64_to_cpup(const __le64 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __u64)*p; -#else - return __swab64p((__force __u64 *)p); -#endif -} - -static inline __le16 __cpu_to_le16p(const __u16 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __le16)*p; -#else - return (__force __le16)__swab16p(p); -#endif -} - -static inline __le32 __cpu_to_le32p(const __u32 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __le32)*p; -#else - return (__force __le32)__swab32p(p); -#endif -} - -static inline __le64 __cpu_to_le64p(const __u64 *p) -{ -#ifdef __LITTLE_ENDIAN - return (__force __le64)*p; -#else - return (__force __le64)__swab64p(p); -#endif -} - -static inline __u16 __be16_to_cpup(const __be16 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __u16)*p; -#else - return __swab16p((__force __u16 *)p); -#endif -} - -static inline __u32 __be32_to_cpup(const __be32 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __u32)*p; -#else - return __swab32p((__force __u32 *)p); -#endif -} - -static inline __u64 __be64_to_cpup(const __be64 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __u64)*p; -#else - return __swab64p((__force __u64 *)p); -#endif -} - -static inline __be16 __cpu_to_be16p(const __u16 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __be16)*p; -#else - return (__force __be16)__swab16p(p); -#endif -} - -static inline __be32 __cpu_to_be32p(const __u32 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __be32)*p; -#else - return (__force __be32)__swab32p(p); -#endif -} - -static inline __be64 __cpu_to_be64p(const __u64 *p) -{ -#ifdef __BIG_ENDIAN - return (__force __be64)*p; -#else - return (__force __be64)__swab64p(p); -#endif -} - -#ifdef __KERNEL__ - -# define le16_to_cpu __le16_to_cpu -# define le32_to_cpu __le32_to_cpu -# define le64_to_cpu __le64_to_cpu -# define be16_to_cpu __be16_to_cpu -# define be32_to_cpu __be32_to_cpu -# define be64_to_cpu __be64_to_cpu -# define cpu_to_le16 __cpu_to_le16 -# define cpu_to_le32 __cpu_to_le32 -# define cpu_to_le64 __cpu_to_le64 -# define cpu_to_be16 __cpu_to_be16 -# define cpu_to_be32 __cpu_to_be32 -# define cpu_to_be64 __cpu_to_be64 - -# define le16_to_cpup __le16_to_cpup -# define le32_to_cpup __le32_to_cpup -# define le64_to_cpup __le64_to_cpup -# define be16_to_cpup __be16_to_cpup -# define be32_to_cpup __be32_to_cpup -# define be64_to_cpup __be64_to_cpup -# define cpu_to_le16p __cpu_to_le16p -# define cpu_to_le32p __cpu_to_le32p -# define cpu_to_le64p __cpu_to_le64p -# define cpu_to_be16p __cpu_to_be16p -# define cpu_to_be32p __cpu_to_be32p -# define cpu_to_be64p __cpu_to_be64p - -# define le16_to_cpus __le16_to_cpus -# define le32_to_cpus __le32_to_cpus -# define le64_to_cpus __le64_to_cpus -# define be16_to_cpus __be16_to_cpus -# define be32_to_cpus __be32_to_cpus -# define be64_to_cpus __be64_to_cpus -# define cpu_to_le16s __cpu_to_le16s -# define cpu_to_le32s __cpu_to_le32s -# define cpu_to_le64s __cpu_to_le64s -# define cpu_to_be16s __cpu_to_be16s -# define cpu_to_be32s __cpu_to_be32s -# define cpu_to_be64s __cpu_to_be64s - -/* - * They have to be macros in order to do the constant folding - * correctly - if the argument passed into a inline function - * it is no longer constant according to gcc.. - */ -# undef ntohl -# undef ntohs -# undef htonl -# undef htons - -# define ___htonl(x) __cpu_to_be32(x) -# define ___htons(x) __cpu_to_be16(x) -# define ___ntohl(x) __be32_to_cpu(x) -# define ___ntohs(x) __be16_to_cpu(x) - -# define htonl(x) ___htonl(x) -# define ntohl(x) ___ntohl(x) -# define htons(x) ___htons(x) -# define ntohs(x) ___ntohs(x) - -static inline void le16_add_cpu(__le16 *var, u16 val) -{ - *var = cpu_to_le16(le16_to_cpup(var) + val); -} - -static inline void le32_add_cpu(__le32 *var, u32 val) -{ - *var = cpu_to_le32(le32_to_cpup(var) + val); -} - -static inline void le64_add_cpu(__le64 *var, u64 val) -{ - *var = cpu_to_le64(le64_to_cpup(var) + val); -} - -static inline void be16_add_cpu(__be16 *var, u16 val) -{ - *var = cpu_to_be16(be16_to_cpup(var) + val); -} - -static inline void be32_add_cpu(__be32 *var, u32 val) -{ - *var = cpu_to_be32(be32_to_cpup(var) + val); -} - -static inline void be64_add_cpu(__be64 *var, u64 val) -{ - *var = cpu_to_be64(be64_to_cpup(var) + val); -} - -#endif /* __KERNEL__ */ -#endif /* _LINUX_BYTEORDER_H */ -- cgit v1.2.3 From ede6f5aea054d3fb67c78857f7abdee602302043 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 Jan 2009 21:17:57 -0800 Subject: Fix up 64-bit byte swaps for most 32-bit architectures The __SWAB_64_THRU_32__ case of a 64-bit byte swap was depending on the no-longer-existant ___swab32() method (three underscores). We got rid of some of the worst indirection and complexity, and now it should just use the 32-bit swab function that was defined right above it. Reported-and-tested-by: Nicolas Pitre Reported-by: Benjamin Herrenschmidt Cc: Harvey Harrison Signed-off-by: Linus Torvalds --- include/linux/swab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swab.h b/include/linux/swab.h index 9a2d33e0a98a..be5284d4a053 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -68,7 +68,7 @@ static inline __attribute_const__ __u64 __fswab64(__u64 val) #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; __u32 l = val & ((1ULL << 32) - 1); - return (((__u64)___swab32(l)) << 32) | ((__u64)(___swab32(h))); + return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h))); #else return ___constant_swab64(val); #endif -- cgit v1.2.3 From e1995f65be0786ca201f466f049dad1e2e4c3421 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 7 Jan 2009 14:29:16 +0100 Subject: i2c: Drop I2C_CLASS_ALL I2C_CLASS_ALL is almost never what bus driver authors really want. These i2c classes are really only about which devices must be probed, not what devices can be present. As device drivers get converted to the new i2c device driver model, only a few device types will keep relying on probing. Signed-off-by: Jean Delvare Acked-by: Sonic Zhang --- include/linux/i2c.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 33a5992d4936..0184de4050b8 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -397,7 +397,6 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) #define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ #define I2C_CLASS_SOUND (1<<6) /* sound devices */ #define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ -#define I2C_CLASS_ALL (UINT_MAX) /* all of the above */ /* i2c_client_address_data is the struct for holding default client * addresses for a driver and for the parameters supplied on the -- cgit v1.2.3 From 994a075f0f2e8cdb919d8e495f98211651e3c461 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 7 Jan 2009 14:29:17 +0100 Subject: i2c: Drop I2C_CLASS_CAM_ANALOG and I2C_CLASS_SOUND There are no users left of these two i2c probe class flags so we can drop the now. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 0184de4050b8..4fd79d2e4021 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -393,9 +393,7 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) #define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */ #define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ -#define I2C_CLASS_CAM_ANALOG (1<<4) /* camera with analog CCD */ #define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ -#define I2C_CLASS_SOUND (1<<6) /* sound devices */ #define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ /* i2c_client_address_data is the struct for holding default client -- cgit v1.2.3 From b305271861219f0ce162eb565f0f28f4c781299d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 7 Jan 2009 14:29:17 +0100 Subject: i2c: Drop I2C_CLASS_CAM_DIGITAL There are a number of drivers which set their i2c bus class to I2C_CLASS_CAM_DIGITAL, however no chip driver actually checks for this flag, so we might as well drop it now. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4fd79d2e4021..20873d402467 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -393,7 +393,6 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) #define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */ #define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ -#define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ #define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ /* i2c_client_address_data is the struct for holding default client -- cgit v1.2.3 From 22a9d645677feefd402befd02edd59b122289ef1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 7 Jan 2009 08:45:46 -0800 Subject: async: Asynchronous function calls to speed up kernel boot Right now, most of the kernel boot is strictly synchronous, such that various hardware delays are done sequentially. In order to make the kernel boot faster, this patch introduces infrastructure to allow doing some of the initialization steps asynchronously, which will hide significant portions of the hardware delays in practice. In order to not change device order and other similar observables, this patch does NOT do full parallel initialization. Rather, it operates more in the way an out of order CPU does; the work may be done out of order and asynchronous, but the observable effects (instruction retiring for the CPU) are still done in the original sequence. Signed-off-by: Arjan van de Ven --- include/linux/async.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/async.h (limited to 'include/linux') diff --git a/include/linux/async.h b/include/linux/async.h new file mode 100644 index 000000000000..c4ecacd0b327 --- /dev/null +++ b/include/linux/async.h @@ -0,0 +1,25 @@ +/* + * async.h: Asynchronous function calls for boot performance + * + * (C) Copyright 2009 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include + +typedef u64 async_cookie_t; +typedef void (async_func_ptr) (void *data, async_cookie_t cookie); + +extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); +extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list); +extern void async_synchronize_full(void); +extern void async_synchronize_full_special(struct list_head *list); +extern void async_synchronize_cookie(async_cookie_t cookie); +extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list); + -- cgit v1.2.3 From efaee192063a54749c56b7383803e16fe553630e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 6 Jan 2009 07:20:54 -0800 Subject: async: make the final inode deletion an asynchronous event this makes "rm -rf" on a (names cached) kernel tree go from 11.6 to 8.6 seconds on an ext3 filesystem Signed-off-by: Arjan van de Ven --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7eba77f666e..e38a64d71eff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1184,6 +1184,11 @@ struct super_block { * generic_show_options() */ char *s_options; + + /* + * storage for asynchronous operations + */ + struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.2.3 From b92a78e582b1a45649143dc86e526f5824092478 Mon Sep 17 00:00:00 2001 From: Rodolfo Giometti Date: Thu, 23 Oct 2008 10:08:07 +0200 Subject: usb host: Oxford OXU210HP HCD driver. This driver implements the support for Oxford OXU210HP USB high-speed host, no peripheral nor OTG. Signed-off-by: Rodolfo Giometti Cc: Kan Liu Signed-off-by: Greg Kroah-Hartman --- include/linux/oxu210hp.h | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 include/linux/oxu210hp.h (limited to 'include/linux') diff --git a/include/linux/oxu210hp.h b/include/linux/oxu210hp.h new file mode 100644 index 000000000000..0bf96eae5389 --- /dev/null +++ b/include/linux/oxu210hp.h @@ -0,0 +1,7 @@ +/* platform data for the OXU210HP HCD */ + +struct oxu210hp_platform_data { + unsigned int bus16:1; + unsigned int use_hcd_otg:1; + unsigned int use_hcd_sph:1; +}; -- cgit v1.2.3 From d767d888750a8e15656b7ee15d68f90a151b8936 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 6 Nov 2008 22:32:15 -0800 Subject: USB: wusb: annotate association types withe proper endianness Also a trivial annotation in rh.c for: drivers/usb/wusbcore/rh.c:366:9: warning: incorrect type in assignment (different base types) drivers/usb/wusbcore/rh.c:366:9: expected unsigned short [unsigned] [short] [usertype] drivers/usb/wusbcore/rh.c:366:9: got restricted __le16 [usertype] drivers/usb/wusbcore/rh.c:367:9: warning: incorrect type in assignment (different base types) drivers/usb/wusbcore/rh.c:367:9: expected unsigned short [unsigned] [short] [usertype] drivers/usb/wusbcore/rh.c:367:9: got restricted __le16 [usertype] Association types annotation fixes piles of warnings similar to: drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types) drivers/usb/wusbcore/cbaf.c:238:30: expected restricted __le16 [usertype] id drivers/usb/wusbcore/cbaf.c:238:30: got int drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types) drivers/usb/wusbcore/cbaf.c:238:30: expected restricted __le16 [usertype] len drivers/usb/wusbcore/cbaf.c:238:30: got int Signed-off-by: Harvey Harrison Cc: David Vrabel Cc: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/association.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/association.h b/include/linux/usb/association.h index 07c5e3cf5898..0a4a18b3c1bb 100644 --- a/include/linux/usb/association.h +++ b/include/linux/usb/association.h @@ -28,17 +28,17 @@ struct wusb_am_attr { }; /* Different fields defined by the spec */ -#define WUSB_AR_AssociationTypeId { .id = 0x0000, .len = 2 } -#define WUSB_AR_AssociationSubTypeId { .id = 0x0001, .len = 2 } -#define WUSB_AR_Length { .id = 0x0002, .len = 4 } -#define WUSB_AR_AssociationStatus { .id = 0x0004, .len = 4 } -#define WUSB_AR_LangID { .id = 0x0008, .len = 2 } -#define WUSB_AR_DeviceFriendlyName { .id = 0x000b, .len = 64 } /* max */ -#define WUSB_AR_HostFriendlyName { .id = 0x000c, .len = 64 } /* max */ -#define WUSB_AR_CHID { .id = 0x1000, .len = 16 } -#define WUSB_AR_CDID { .id = 0x1001, .len = 16 } -#define WUSB_AR_ConnectionContext { .id = 0x1002, .len = 48 } -#define WUSB_AR_BandGroups { .id = 0x1004, .len = 2 } +#define WUSB_AR_AssociationTypeId { .id = cpu_to_le16(0x0000), .len = cpu_to_le16(2) } +#define WUSB_AR_AssociationSubTypeId { .id = cpu_to_le16(0x0001), .len = cpu_to_le16(2) } +#define WUSB_AR_Length { .id = cpu_to_le16(0x0002), .len = cpu_to_le16(4) } +#define WUSB_AR_AssociationStatus { .id = cpu_to_le16(0x0004), .len = cpu_to_le16(4) } +#define WUSB_AR_LangID { .id = cpu_to_le16(0x0008), .len = cpu_to_le16(2) } +#define WUSB_AR_DeviceFriendlyName { .id = cpu_to_le16(0x000b), .len = cpu_to_le16(64) } /* max */ +#define WUSB_AR_HostFriendlyName { .id = cpu_to_le16(0x000c), .len = cpu_to_le16(64) } /* max */ +#define WUSB_AR_CHID { .id = cpu_to_le16(0x1000), .len = cpu_to_le16(16) } +#define WUSB_AR_CDID { .id = cpu_to_le16(0x1001), .len = cpu_to_le16(16) } +#define WUSB_AR_ConnectionContext { .id = cpu_to_le16(0x1002), .len = cpu_to_le16(48) } +#define WUSB_AR_BandGroups { .id = cpu_to_le16(0x1004), .len = cpu_to_le16(2) } /* CBAF Control Requests (AMS1.0[T4-1] */ enum { -- cgit v1.2.3 From 9ac39f28b5237a629e41ccfc1f73d3a55723045c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 12 Nov 2008 16:19:49 -0500 Subject: USB: add asynchronous autosuspend/autoresume support This patch (as1160b) adds support routines for asynchronous autosuspend and autoresume, with accompanying documentation updates. There already are several potential users of this interface, and others are likely to arise as autosuspend support becomes more widespread. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index f72aa51f7bcd..859a88e6ce9c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -398,6 +398,7 @@ struct usb_tt; * @urbnum: number of URBs submitted for the whole device * @active_duration: total time device is not suspended * @autosuspend: for delayed autosuspends + * @autoresume: for autoresumes requested while in_interrupt * @pm_mutex: protects PM operations * @last_busy: time of last use * @autosuspend_delay: in jiffies @@ -476,6 +477,7 @@ struct usb_device { #ifdef CONFIG_PM struct delayed_work autosuspend; + struct work_struct autoresume; struct mutex pm_mutex; unsigned long last_busy; @@ -513,6 +515,8 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id); extern int usb_autopm_set_interface(struct usb_interface *intf); extern int usb_autopm_get_interface(struct usb_interface *intf); extern void usb_autopm_put_interface(struct usb_interface *intf); +extern int usb_autopm_get_interface_async(struct usb_interface *intf); +extern void usb_autopm_put_interface_async(struct usb_interface *intf); static inline void usb_autopm_enable(struct usb_interface *intf) { @@ -539,8 +543,13 @@ static inline int usb_autopm_set_interface(struct usb_interface *intf) static inline int usb_autopm_get_interface(struct usb_interface *intf) { return 0; } +static inline int usb_autopm_get_interface_async(struct usb_interface *intf) +{ return 0; } + static inline void usb_autopm_put_interface(struct usb_interface *intf) { } +static inline void usb_autopm_put_interface_async(struct usb_interface *intf) +{ } static inline void usb_autopm_enable(struct usb_interface *intf) { } static inline void usb_autopm_disable(struct usb_interface *intf) -- cgit v1.2.3 From dc023dceec861c60bc1d1a17a2c6496ddac26ee7 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Thu, 13 Nov 2008 10:31:35 -0800 Subject: USB: Introduce usb_queue_reset() to do resets from atomic contexts This patch introduces a new call to be able to do a USB reset from an atomic contect. This is quite helpful in USB callbacks to handle errors (when the only thing that can be done is to do a device reset). It is done queuing a work struct that will do the actual reset. The struct is "attached" to an interface so pending requests from an interface are removed when said interface is unbound from the driver. The call flow then becomes: usb_queue_reset_device() __usb_queue_reset_device() [workqueue] usb_reset_device() usb_probe_interface() usb_cancel_queue_reset() [error path] usb_unbind_interface() usb_cancel_queue_reset() usb_driver_release_interface() usb_cancel_queue_reset() Note usb_cancel_queue_reset() needs smarts to try not to unqueue when it is actually being executed. This happens when we run the reset from the workqueue: usb_reset_device() is called and on interface unbind time, usb_cancel_queue_reset() would be called. That would deadlock on cancel_work_sync(). To avoid that, we set (before running usb_reset_device()) usb_intf->reset_running and clear it inmediately after returning. Patch is against 2.6.28-rc2 and depends on http://marc.info/?l=linux-usb&m=122581634925308&w=2 (as submitted by Alan Stern). Signed-off-by: Inaky Perez-Gonzalez Cc: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 859a88e6ce9c..c8e55aa979de 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -120,6 +120,11 @@ enum usb_interface_condition { * to the sysfs representation for that device. * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not * allowed unless the counter is 0. + * @reset_ws: Used for scheduling resets from atomic context. + * @reset_running: set to 1 if the interface is currently running a + * queued reset so that usb_cancel_queued_reset() doesn't try to + * remove from the workqueue when running inside the worker + * thread. See __usb_queue_reset_device(). * * USB device drivers attach to interfaces on a physical device. Each * interface encapsulates a single high level function, such as feeding @@ -168,10 +173,12 @@ struct usb_interface { unsigned needs_remote_wakeup:1; /* driver requires remote wakeup */ unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ unsigned needs_binding:1; /* needs delayed unbind/rebind */ + unsigned reset_running:1; struct device dev; /* interface specific device info */ struct device *usb_dev; int pm_usage_cnt; /* usage counter for autosuspend */ + struct work_struct reset_ws; /* for resets in atomic context */ }; #define to_usb_interface(d) container_of(d, struct usb_interface, dev) #define interface_to_usbdev(intf) \ @@ -507,6 +514,7 @@ extern int usb_lock_device_for_reset(struct usb_device *udev, /* USB port reset for device reinitialization */ extern int usb_reset_device(struct usb_device *dev); +extern void usb_queue_reset_device(struct usb_interface *dev); extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id); -- cgit v1.2.3 From f150fa1afbf69a87f54752579ff2bb769aad88b3 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Thu, 13 Nov 2008 21:31:21 -0700 Subject: USB: Allow usbmon as a module even if usbcore is builtin usbmon can only be built as a module if usbcore is a module too. Trivial changes to the relevant Kconfig and Makefile (and a few trivial changes elsewhere) allow usbmon to be built as a module even if usbcore is builtin. This is verified to work in all 9 permutations (3 correctly prohibited by Kconfig, 6 build a suitable result). Signed-off-by: Paul Bolle Signed-off-by: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index c8e55aa979de..8bc81bffc195 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -336,7 +336,7 @@ struct usb_bus { #endif struct device *dev; /* device for this bus */ -#if defined(CONFIG_USB_MON) +#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE) struct mon_bus *mon_bus; /* non-null when associated */ int monitored; /* non-zero when monitored */ #endif -- cgit v1.2.3 From 1537e0ad944acf3a4c2b311a646d7993b89499f7 Mon Sep 17 00:00:00 2001 From: Ben Efros Date: Tue, 18 Nov 2008 13:31:13 -0800 Subject: USB: storage devices and SAT Add the SANE SENSE flag to indicate that a device is capable of handling more than 18-bytes of sense data. This functionality is required for USB-ATA bridges implementing SAT. A future patch will actually enable this function for several devices. The logic behind this is that we can detect support for SANE_SENSE in a few ways: 1) ATA PASS THROUGH (12) or (16) execute successfully 2) SPC-3 or higher is in use 3) A previous CHECK CONDITION occurred with sense format 70-73 and had a length greater than 18-bytes total Signed-off-by: Ben Efros Signed-off-by: Matthew Dharm Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index d9a3bbe38e6b..998e5cbbf29e 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -52,8 +52,9 @@ US_FLAG(MAX_SECTORS_MIN,0x00002000) \ /* Sets max_sectors to arch min */ \ US_FLAG(BULK_IGNORE_TAG,0x00004000) \ - /* Ignore tag mismatch in bulk operations */ - + /* Ignore tag mismatch in bulk operations */ \ + US_FLAG(SANE_SENSE, 0x00008000) + /* Sane Sense (> 18 bytes) */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From 6084f1bf0c51a99cbba612ee90a4607cffb8b042 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 24 Nov 2008 12:00:01 -0800 Subject: USB: otg: gpio_vbus transceiver stub gpio_vbus provides simple GPIO VBUS sensing for peripheral controllers with an internal transceiver. Optionally, a second GPIO can be used to control D+ pullup. It also interfaces with the regulator framework to limit charging currents when powered via USB. gpio_vbus requests the regulator supplying "vbus_draw" and can enable/disable it or limit its current depending on USB state. [dbrownell@users.sourceforge.net: use drivers/otg, cleanups ] Signed-off-by: Philipp Zabel Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gpio_vbus.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/usb/gpio_vbus.h (limited to 'include/linux') diff --git a/include/linux/usb/gpio_vbus.h b/include/linux/usb/gpio_vbus.h new file mode 100644 index 000000000000..d9f03ccc2d60 --- /dev/null +++ b/include/linux/usb/gpio_vbus.h @@ -0,0 +1,30 @@ +/* + * A simple GPIO VBUS sensing driver for B peripheral only devices + * with internal transceivers. + * Optionally D+ pullup can be controlled by a second GPIO. + * + * Copyright (c) 2008 Philipp Zabel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/** + * struct gpio_vbus_mach_info - configuration for gpio_vbus + * @gpio_vbus: VBUS sensing GPIO + * @gpio_pullup: optional D+ or D- pullup GPIO (else negative/invalid) + * @gpio_vbus_inverted: true if gpio_vbus is active low + * @gpio_pullup_inverted: true if gpio_pullup is active low + * + * The VBUS sensing GPIO should have a pulldown, which will normally be + * part of a resistor ladder turning a 4.0V-5.25V level on VBUS into a + * value the GPIO detects as active. Some systems will use comparators. + */ +struct gpio_vbus_mach_info { + int gpio_vbus; + int gpio_pullup; + bool gpio_vbus_inverted; + bool gpio_pullup_inverted; +}; -- cgit v1.2.3 From 68144e0cc92125f41157ede7b060f83367bc4fe7 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 24 Nov 2008 12:01:17 -0800 Subject: USB: otg: add otg_put_transceiver() As Russell King points out, calling put_device(otg_transceiver->dev) directly in driver cleanup paths makes assumptions about otg_transceiver internals. Signed-off-by: Philipp Zabel Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/otg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index 1db25d152ad8..94df4fe6c6c0 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -84,6 +84,7 @@ extern int otg_set_transceiver(struct otg_transceiver *); /* for usb host and peripheral controller drivers */ extern struct otg_transceiver *otg_get_transceiver(void); +extern void otg_put_transceiver(struct otg_transceiver *); static inline int otg_start_hnp(struct otg_transceiver *otg) -- cgit v1.2.3 From 65bfd2967c906ca322a4bb69a285fe0de8916ac6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 25 Nov 2008 16:39:18 -0500 Subject: USB: Enhance usage of pm_message_t This patch (as1177) modifies the USB core suspend and resume routines. The resume functions now will take a pm_message_t argument, so they will know what sort of resume is occurring. The new argument is also passed to the port suspend/resume and bus suspend/resume routines (although they don't use it for anything but debugging). In addition, special pm_message_t values are used for user-initiated, device-initiated (i.e., remote wakeup), and automatic suspend/resume. By testing these values, drivers can tell whether or not a particular suspend was an autosuspend. Unfortunately, they can't do the same for resumes -- not until the pm_message_t argument is also passed to the drivers' resume methods. That will require a bigger change. IMO, the whole Power Management framework should have been set up this way in the first place. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 8bc81bffc195..74d0b9990c73 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1067,7 +1067,7 @@ struct usb_device_driver { void (*disconnect) (struct usb_device *udev); int (*suspend) (struct usb_device *udev, pm_message_t message); - int (*resume) (struct usb_device *udev); + int (*resume) (struct usb_device *udev, pm_message_t message); struct usbdrv_wrap drvwrap; unsigned int supports_autosuspend:1; }; -- cgit v1.2.3 From 2ffcdb3bdadaf8260986e96384df26c94a6ad42c Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Tue, 2 Dec 2008 21:33:43 +0200 Subject: USB: musb: use new platform data interface of musb to replace old one Signed-off-by: Bryan Wu Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/musb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 630962c04ca4..d6aad0ea6033 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -47,6 +47,11 @@ struct musb_hdrc_config { u8 ram_bits; /* ram address size */ struct musb_hdrc_eps_bits *eps_bits; +#ifdef CONFIG_BLACKFIN + /* A GPIO controlling VRSEL in Blackfin */ + unsigned int gpio_vrsel; +#endif + }; struct musb_hdrc_platform_data { -- cgit v1.2.3 From 3b23dd6f8a718e5339de4f7d86ce76a078b5f771 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 5 Dec 2008 14:10:34 -0500 Subject: USB: utilize the bus notifiers This patch (as1185) makes usbcore take advantage of the bus notifications sent out by the driver core. Now we can create all our device and interface attribute files before the device or interface uevent is broadcast. A side effect is that we no longer create the endpoint "pseudo" devices at the same time as a device or interface is registered -- it seems like a bad idea to try registering an endpoint before the registration of its parent is complete. So the routines for creating and removing endpoint devices have been split out and renamed, and they are called explicitly when needed. A new bitflag is used for keeping track of whether or not the interface's endpoint devices have been created, since (just as with the interface attributes) they vary with the altsetting and hence can be changed at random times. Signed-off-by: Alan Stern Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 74d0b9990c73..e9d63562325a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -108,6 +108,7 @@ enum usb_interface_condition { * (in probe()), bound to a driver, or unbinding (in disconnect()) * @is_active: flag set when the interface is bound and not suspended. * @sysfs_files_created: sysfs attributes exist + * @ep_devs_created: endpoint child pseudo-devices exist * @unregistering: flag set when the interface is being unregistered * @needs_remote_wakeup: flag set when the driver requires remote-wakeup * capability during autosuspend. @@ -169,6 +170,7 @@ struct usb_interface { enum usb_interface_condition condition; /* state of binding */ unsigned is_active:1; /* the interface is not suspended */ unsigned sysfs_files_created:1; /* the sysfs attributes exist */ + unsigned ep_devs_created:1; /* endpoint "devices" exist */ unsigned unregistering:1; /* unregistration is in progress */ unsigned needs_remote_wakeup:1; /* driver requires remote wakeup */ unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ -- cgit v1.2.3 From 49367d8f1d9f26482cf7089489e90f0afd0a942c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Dec 2008 21:38:45 +0800 Subject: USB: mark "reject" field of struct urb as atomic_t It is enough to protect accesses to reject field of urb by marking it as atomic_t,also it is the only reason of existence of usb_reject_lock,so remove the lock to make code more clean. Signed-off-by: Ming Lei Acked-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e9d63562325a..4e8654a18250 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1340,7 +1340,7 @@ struct urb { struct kref kref; /* reference count of the URB */ void *hcpriv; /* private data for host controller */ atomic_t use_count; /* concurrent submissions counter */ - u8 reject; /* submissions will fail */ + atomic_t reject; /* submissions will fail */ int unlinked; /* unlink error code */ /* public: documented fields in the urb that can be used by drivers */ -- cgit v1.2.3 From 856395d6e137b4e7194972cb7765f3de6a72ba61 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 18 Dec 2008 09:17:49 +0100 Subject: USB: extension of anchor API to unpoison an anchor This extension allows unpoisoning an anchor allowing drivers that resubmit URBs to reuse an anchor for methods like resume() Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 4e8654a18250..e89639896508 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1485,6 +1485,7 @@ extern void usb_poison_urb(struct urb *urb); extern void usb_unpoison_urb(struct urb *urb); extern void usb_kill_anchored_urbs(struct usb_anchor *anchor); extern void usb_poison_anchored_urbs(struct usb_anchor *anchor); +extern void usb_unpoison_anchored_urbs(struct usb_anchor *anchor); extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor); extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor); extern void usb_unanchor_urb(struct urb *urb); -- cgit v1.2.3 From 25ff1c316f6a763f1eefe7f8984b2d8c03888432 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Dec 2008 12:43:41 -0500 Subject: USB: storage: add last-sector hacks This patch (as1189b) adds some hacks to usb-storage for dealing with the growing problems involving bad capacity values and last-sector accesses: A new flag, US_FL_CAPACITY_OK, is created to indicate that the device is known to report its capacity correctly. An unusual_devs entry for Linux's own File-backed Storage Gadget is added with this flag set, since g_file_storage always reports the correct capacity and since the capacity need not be even (it is determined by the size of the backing file). An entry in unusual_devs.h which has only the CAPACITY_OK flag set shouldn't prejudice libusual, since the device will work perfectly well with either usb-storage or ub. So a new macro, COMPLIANT_DEV, is added to let libusual know about these entries. When a last-sector access succeeds and the total number of sectors is odd (the unexpected case, in which guessing that the number is even might cause trouble), a WARN is triggered. The kerneloops.org project will collect these warnings, allowing us to add CAPACITY_OK flags for the devices in question before implementing the default-to-even heuristic. If users want to prevent the stack dump produced by the WARN, they can disable the hack by adding an unusual_devs entry for their device with the CAPACITY_OK flag. When a last-sector access fails three times in a row and neither the FIX_CAPACITY nor the CAPACITY_OK flag is set, we assume the last-sector bug is present. We replace the existing status and sense data with values that will cause the SCSI core to fail the access immediately rather than retry indefinitely. This should fix the difficulties people have been having with Nokia phones. Signed-off-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 998e5cbbf29e..1eea1ab68dc4 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -53,8 +53,10 @@ /* Sets max_sectors to arch min */ \ US_FLAG(BULK_IGNORE_TAG,0x00004000) \ /* Ignore tag mismatch in bulk operations */ \ - US_FLAG(SANE_SENSE, 0x00008000) - /* Sane Sense (> 18 bytes) */ + US_FLAG(SANE_SENSE, 0x00008000) \ + /* Sane Sense (> 18 bytes) */ \ + US_FLAG(CAPACITY_OK, 0x00010000) \ + /* READ CAPACITY response is correct */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From 338b67b0c1a97ca705023a8189cf41aa0828d294 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 Aug 2008 09:37:34 -0700 Subject: USB: remove warn() macro from usb.h USB should not be having it's own printk macros, so remove warn() and use the system-wide standard of dev_warn() wherever possible. In the few places that will not work out, use a basic printk(). Now that all in-tree users are gone, remove the macro. Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e89639896508..28f68f587793 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1744,8 +1744,6 @@ extern void usb_unregister_notify(struct notifier_block *nb); format "\n" , ## arg) #define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \ format "\n" , ## arg) -#define warn(format, arg...) printk(KERN_WARNING KBUILD_MODNAME ": " \ - format "\n" , ## arg) #endif /* __KERNEL__ */ -- cgit v1.2.3 From 34c65d82e02147331701c7795e3144d511adf4e9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Aug 2008 13:21:04 -0700 Subject: USB: remove info() macro from usb.h USB should not be having it's own printk macros, so remove info() and use the system-wide standard of dev_info() wherever possible. No one in the tree is using the macro, so it can now be removed. Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 28f68f587793..85ee9be9361e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1742,8 +1742,6 @@ extern void usb_unregister_notify(struct notifier_block *nb); #define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \ format "\n" , ## arg) -#define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \ - format "\n" , ## arg) #endif /* __KERNEL__ */ -- cgit v1.2.3 From 5e07878787ad07361571150230cc3a8d522ae046 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:39 -0800 Subject: debugfs: add helpers for exporting a size_t simple value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the same spirit as debugfs_create_*(), introduce helpers for exporting size_t values over debugfs. The only trick done is that the format verifier is kept at %llu instead of %zu; otherwise type warnings would pop up: format ‘%zu’ expects type ‘size_t’, but argument 2 has type ‘long long unsigned int’ There is no real way to fix this one--however, we can consider %llu and %zu to be compatible if we consider that we are using the same for validating in debugfs_create_{x,u}{8,16,32}(). Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index e1a6c046cea3..23936b16426b 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -63,6 +63,8 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode, struct dentry *parent, u16 *value); struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value); +struct dentry *debugfs_create_size_t(const char *name, mode_t mode, + struct dentry *parent, size_t *value); struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *parent, u32 *value); -- cgit v1.2.3 From ace22f0881e1333d0c55ddf484e5352fe03a806a Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:33 -0800 Subject: wimax: headers for kernel API and user space interaction Definitions for the user/kernel API protocol through generic netlink. User space can copy it verbatim and use it. Kernel API definition declares the main data types and calls for the drivers to integrate into the WiMAX stack. Provides usage documentation. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax.h | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 include/linux/wimax.h (limited to 'include/linux') diff --git a/include/linux/wimax.h b/include/linux/wimax.h new file mode 100644 index 000000000000..c89de7f4e5b9 --- /dev/null +++ b/include/linux/wimax.h @@ -0,0 +1,234 @@ +/* + * Linux WiMax + * API for user space + * + * + * Copyright (C) 2007-2008 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Intel Corporation + * Inaky Perez-Gonzalez + * - Initial implementation + * + * + * This file declares the user/kernel protocol that is spoken over + * Generic Netlink, as well as any type declaration that is to be used + * by kernel and user space. + * + * It is intended for user space to clone it verbatim to use it as a + * primary reference for definitions. + * + * Stuff intended for kernel usage as well as full protocol and stack + * documentation is rooted in include/net/wimax.h. + */ + +#ifndef __LINUX__WIMAX_H__ +#define __LINUX__WIMAX_H__ + +#include + +enum { + /** + * Version of the interface (unsigned decimal, MMm, max 25.5) + * M - Major: change if removing or modifying an existing call. + * m - minor: change when adding a new call + */ + WIMAX_GNL_VERSION = 00, + /* Generic NetLink attributes */ + WIMAX_GNL_ATTR_INVALID = 0x00, + WIMAX_GNL_ATTR_MAX = 10, +}; + + +/* + * Generic NetLink operations + * + * Most of these map to an API call; _OP_ stands for operation, _RP_ + * for reply and _RE_ for report (aka: signal). + */ +enum { + WIMAX_GNL_OP_MSG_FROM_USER, /* User to kernel message */ + WIMAX_GNL_OP_MSG_TO_USER, /* Kernel to user message */ + WIMAX_GNL_OP_RFKILL, /* Run wimax_rfkill() */ + WIMAX_GNL_OP_RESET, /* Run wimax_rfkill() */ + WIMAX_GNL_RE_STATE_CHANGE, /* Report: status change */ +}; + + +/* Message from user / to user */ +enum { + WIMAX_GNL_MSG_IFIDX = 1, + WIMAX_GNL_MSG_PIPE_NAME, + WIMAX_GNL_MSG_DATA, +}; + + +/* + * wimax_rfkill() + * + * The state of the radio (ON/OFF) is mapped to the rfkill subsystem's + * switch state (DISABLED/ENABLED). + */ +enum wimax_rf_state { + WIMAX_RF_OFF = 0, /* Radio is off, rfkill on/enabled */ + WIMAX_RF_ON = 1, /* Radio is on, rfkill off/disabled */ + WIMAX_RF_QUERY = 2, +}; + +/* Attributes */ +enum { + WIMAX_GNL_RFKILL_IFIDX = 1, + WIMAX_GNL_RFKILL_STATE, +}; + + +/* Attributes for wimax_reset() */ +enum { + WIMAX_GNL_RESET_IFIDX = 1, +}; + + +/* + * Attributes for the Report State Change + * + * For now we just have the old and new states; new attributes might + * be added later on. + */ +enum { + WIMAX_GNL_STCH_IFIDX = 1, + WIMAX_GNL_STCH_STATE_OLD, + WIMAX_GNL_STCH_STATE_NEW, +}; + + +/** + * enum wimax_st - The different states of a WiMAX device + * @__WIMAX_ST_NULL: The device structure has been allocated and zeroed, + * but still wimax_dev_add() hasn't been called. There is no state. + * + * @WIMAX_ST_DOWN: The device has been registered with the WiMAX and + * networking stacks, but it is not initialized (normally that is + * done with 'ifconfig DEV up' [or equivalent], which can upload + * firmware and enable communications with the device). + * In this state, the device is powered down and using as less + * power as possible. + * This state is the default after a call to wimax_dev_add(). It + * is ok to have drivers move directly to %WIMAX_ST_UNINITIALIZED + * or %WIMAX_ST_RADIO_OFF in _probe() after the call to + * wimax_dev_add(). + * It is recommended that the driver leaves this state when + * calling 'ifconfig DEV up' and enters it back on 'ifconfig DEV + * down'. + * + * @__WIMAX_ST_QUIESCING: The device is being torn down, so no API + * operations are allowed to proceed except the ones needed to + * complete the device clean up process. + * + * @WIMAX_ST_UNINITIALIZED: [optional] Communication with the device + * is setup, but the device still requires some configuration + * before being operational. + * Some WiMAX API calls might work. + * + * @WIMAX_ST_RADIO_OFF: The device is fully up; radio is off (wether + * by hardware or software switches). + * It is recommended to always leave the device in this state + * after initialization. + * + * @WIMAX_ST_READY: The device is fully up and radio is on. + * + * @WIMAX_ST_SCANNING: [optional] The device has been instructed to + * scan. In this state, the device cannot be actively connected to + * a network. + * + * @WIMAX_ST_CONNECTING: The device is connecting to a network. This + * state exists because in some devices, the connect process can + * include a number of negotiations between user space, kernel + * space and the device. User space needs to know what the device + * is doing. If the connect sequence in a device is atomic and + * fast, the device can transition directly to CONNECTED + * + * @WIMAX_ST_CONNECTED: The device is connected to a network. + * + * @__WIMAX_ST_INVALID: This is an invalid state used to mark the + * maximum numeric value of states. + * + * Description: + * + * Transitions from one state to another one are atomic and can only + * be caused in kernel space with wimax_state_change(). To read the + * state, use wimax_state_get(). + * + * States starting with __ are internal and shall not be used or + * referred to by drivers or userspace. They look ugly, but that's the + * point -- if any use is made non-internal to the stack, it is easier + * to catch on review. + * + * All API operations [with well defined exceptions] will take the + * device mutex before starting and then check the state. If the state + * is %__WIMAX_ST_NULL, %WIMAX_ST_DOWN, %WIMAX_ST_UNINITIALIZED or + * %__WIMAX_ST_QUIESCING, it will drop the lock and quit with + * -%EINVAL, -%ENOMEDIUM, -%ENOTCONN or -%ESHUTDOWN. + * + * The order of the definitions is important, so we can do numerical + * comparisons (eg: < %WIMAX_ST_RADIO_OFF means the device is not ready + * to operate). + */ +/* + * The allowed state transitions are described in the table below + * (states in rows can go to states in columns where there is an X): + * + * UNINI RADIO READY SCAN CONNEC CONNEC + * NULL DOWN QUIESCING TIALIZED OFF NING TING TED + * NULL - x + * DOWN - x x x + * QUIESCING x - + * UNINITIALIZED x - x + * RADIO_OFF x - x + * READY x x - x x x + * SCANNING x x x - x x + * CONNECTING x x x x - x + * CONNECTED x x x - + * + * This table not available in kernel-doc because the formatting messes it up. + */ + enum wimax_st { + __WIMAX_ST_NULL = 0, + WIMAX_ST_DOWN, + __WIMAX_ST_QUIESCING, + WIMAX_ST_UNINITIALIZED, + WIMAX_ST_RADIO_OFF, + WIMAX_ST_READY, + WIMAX_ST_SCANNING, + WIMAX_ST_CONNECTING, + WIMAX_ST_CONNECTED, + __WIMAX_ST_INVALID /* Always keep last */ +}; + + +#endif /* #ifndef __LINUX__WIMAX_H__ */ -- cgit v1.2.3 From ea912f4e7f264981faf8665cfb63d46d7f948117 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:35 -0800 Subject: wimax: debug macros and debug settings for the WiMAX stack This file contains a simple debug framework that is used in the stack; it allows the debug level to be controlled at compile-time (so the debug code is optimized out) and at run-time (for what wasn't compiled out). This is eventually going to be moved to use dynamic_printk(). Just need to find time to do it. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax/debug.h | 453 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 453 insertions(+) create mode 100644 include/linux/wimax/debug.h (limited to 'include/linux') diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h new file mode 100644 index 000000000000..ba0c49399a83 --- /dev/null +++ b/include/linux/wimax/debug.h @@ -0,0 +1,453 @@ +/* + * Linux WiMAX + * Collection of tools to manage debug operations. + * + * + * Copyright (C) 2005-2007 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * Don't #include this file directly, read on! + * + * + * EXECUTING DEBUGGING ACTIONS OR NOT + * + * The main thing this framework provides is decission power to take a + * debug action (like printing a message) if the current debug level + * allows it. + * + * The decission power is at two levels: at compile-time (what does + * not make it is compiled out) and at run-time. The run-time + * selection is done per-submodule (as they are declared by the user + * of the framework). + * + * A call to d_test(L) (L being the target debug level) returns true + * if the action should be taken because the current debug levels + * allow it (both compile and run time). + * + * It follows that a call to d_test() that can be determined to be + * always false at compile time will get the code depending on it + * compiled out by optimization. + * + * + * DEBUG LEVELS + * + * It is up to the caller to define how much a debugging level is. + * + * Convention sets 0 as "no debug" (so an action marked as debug level 0 + * will always be taken). The increasing debug levels are used for + * increased verbosity. + * + * + * USAGE + * + * Group the code in modules and submodules inside each module [which + * in most cases maps to Linux modules and .c files that compose + * those]. + * + * + * For each module, there is: + * + * - a MODULENAME (single word, legal C identifier) + * + * - a debug-levels.h header file that declares the list of + * submodules and that is included by all .c files that use + * the debugging tools. The file name can be anything. + * + * - some (optional) .c code to manipulate the runtime debug levels + * through debugfs. + * + * The debug-levels.h file would look like: + * + * #ifndef __debug_levels__h__ + * #define __debug_levels__h__ + * + * #define D_MODULENAME modulename + * #define D_MASTER 10 + * + * #include + * + * enum d_module { + * D_SUBMODULE_DECLARE(submodule_1), + * D_SUBMODULE_DECLARE(submodule_2), + * ... + * D_SUBMODULE_DECLARE(submodule_N) + * }; + * + * #endif + * + * D_MASTER is the maximum compile-time debug level; any debug actions + * above this will be out. D_MODULENAME is the module name (legal C + * identifier), which has to be unique for each module (to avoid + * namespace collisions during linkage). Note those #defines need to + * be done before #including debug.h + * + * We declare N different submodules whose debug level can be + * independently controlled during runtime. + * + * In a .c file of the module (and only in one of them), define the + * following code: + * + * struct d_level D_LEVEL[] = { + * D_SUBMODULE_DEFINE(submodule_1), + * D_SUBMODULE_DEFINE(submodule_2), + * ... + * D_SUBMODULE_DEFINE(submodule_N), + * }; + * size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); + * + * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used + * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros + * #defined also in this file. + * + * To manipulate from user space the levels, create a debugfs dentry + * and then register each submodule with: + * + * result = d_level_register_debugfs("PREFIX_", submodule_X, parent); + * if (result < 0) + * goto error; + * + * Where PREFIX_ is a name of your chosing. This will create debugfs + * file with a single numeric value that can be use to tweak it. To + * remove the entires, just use debugfs_remove_recursive() on 'parent'. + * + * NOTE: remember that even if this will show attached to some + * particular instance of a device, the settings are *global*. + * + * + * On each submodule (for example, .c files), the debug infrastructure + * should be included like this: + * + * #define D_SUBMODULE submodule_x // matches one in debug-levels.h + * #include "debug-levels.h" + * + * after #including all your include files. + * + * + * Now you can use the d_*() macros below [d_test(), d_fnstart(), + * d_fnend(), d_printf(), d_dump()]. + * + * If their debug level is greater than D_MASTER, they will be + * compiled out. + * + * If their debug level is lower or equal than D_MASTER but greater + * than the current debug level of their submodule, they'll be + * ignored. + * + * Otherwise, the action will be performed. + */ +#ifndef __debug__h__ +#define __debug__h__ + +#include +#include + + +/* Backend stuff */ + +/* + * Debug backend: generate a message header from a 'struct device' + * + * @head: buffer where to place the header + * @head_size: length of @head + * @dev: pointer to device used to generate a header from. If NULL, + * an empty ("") header is generated. + */ +static inline +void __d_head(char *head, size_t head_size, + struct device *dev) +{ + if (dev == NULL) + head[0] = 0; + else if ((unsigned long)dev < 4096) { + printk(KERN_ERR "E: Corrupt dev %p\n", dev); + WARN_ON(1); + } else + snprintf(head, head_size, "%s %s: ", + dev_driver_string(dev), dev->bus_id); +} + + +/* + * Debug backend: log some message if debugging is enabled + * + * @l: intended debug level + * @tag: tag to prefix the message with + * @dev: 'struct device' associated to this message + * @f: printf-like format and arguments + * + * Note this is optimized out if it doesn't pass the compile-time + * check; however, it is *always* compiled. This is useful to make + * sure the printf-like formats and variables are always checked and + * they don't get bit rot if you have all the debugging disabled. + */ +#define _d_printf(l, tag, dev, f, a...) \ +do { \ + char head[64]; \ + if (!d_test(l)) \ + break; \ + __d_head(head, sizeof(head), dev); \ + printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a); \ +} while (0) + + +/* + * CPP sintatic sugar to generate A_B like symbol names when one of + * the arguments is a a preprocessor #define. + */ +#define __D_PASTE__(varname, modulename) varname##_##modulename +#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename)) +#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name)) + + +/* + * Store a submodule's runtime debug level and name + */ +struct d_level { + u8 level; + const char *name; +}; + + +/* + * List of available submodules and their debug levels + * + * We call them d_level_MODULENAME and d_level_size_MODULENAME; the + * macros D_LEVEL and D_LEVEL_SIZE contain the name already for + * convenience. + * + * This array and the size are defined on some .c file that is part of + * the current module. + */ +#define D_LEVEL __D_PASTE(d_level, D_MODULENAME) +#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME) + +extern struct d_level D_LEVEL[]; +extern size_t D_LEVEL_SIZE; + + +/* + * Frontend stuff + * + * + * Stuff you need to declare prior to using the actual "debug" actions + * (defined below). + */ + +#ifndef D_MODULENAME +#error D_MODULENAME is not defined in your debug-levels.h file +/** + * D_MODULE - Name of the current module + * + * #define in your module's debug-levels.h, making sure it is + * unique. This has to be a legal C identifier. + */ +#define D_MODULENAME undefined_modulename +#endif + + +#ifndef D_MASTER +#warning D_MASTER not defined, but debug.h included! [see docs] +/** + * D_MASTER - Compile time maximum debug level + * + * #define in your debug-levels.h file to the maximum debug level the + * runtime code will be allowed to have. This allows you to provide a + * main knob. + * + * Anything above that level will be optimized out of the compile. + * + * Defaults to zero (no debug code compiled in). + * + * Maximum one definition per module (at the debug-levels.h file). + */ +#define D_MASTER 0 +#endif + +#ifndef D_SUBMODULE +#error D_SUBMODULE not defined, but debug.h included! [see docs] +/** + * D_SUBMODULE - Name of the current submodule + * + * #define in your submodule .c file before #including debug-levels.h + * to the name of the current submodule as previously declared and + * defined with D_SUBMODULE_DECLARE() (in your module's + * debug-levels.h) and D_SUBMODULE_DEFINE(). + * + * This is used to provide runtime-control over the debug levels. + * + * Maximum one per .c file! Can be shared among different .c files + * (meaning they belong to the same submodule categorization). + */ +#define D_SUBMODULE undefined_module +#endif + + +/** + * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control + * + * @_name: name of the submodule, restricted to the chars that make up a + * valid C identifier ([a-zA-Z0-9_]). + * + * Declare in the module's debug-levels.h header file as: + * + * enum d_module { + * D_SUBMODULE_DECLARE(submodule_1), + * D_SUBMODULE_DECLARE(submodule_2), + * D_SUBMODULE_DECLARE(submodule_3), + * }; + * + * Some corresponding .c file needs to have a matching + * D_SUBMODULE_DEFINE(). + */ +#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name + + +/** + * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control + * + * @_name: name of the submodule, restricted to the chars that make up a + * valid C identifier ([a-zA-Z0-9_]). + * + * Use once per module (in some .c file) as: + * + * static + * struct d_level d_level_SUBMODULENAME[] = { + * D_SUBMODULE_DEFINE(submodule_1), + * D_SUBMODULE_DEFINE(submodule_2), + * D_SUBMODULE_DEFINE(submodule_3), + * }; + * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME); + * + * Matching D_SUBMODULE_DECLARE()s have to be present in a + * debug-levels.h header file. + */ +#define D_SUBMODULE_DEFINE(_name) \ +[__D_SUBMODULE_##_name] = { \ + .level = 0, \ + .name = #_name \ +} + + + +/* The actual "debug" operations */ + + +/** + * d_test - Returns true if debugging should be enabled + * + * @l: intended debug level (unsigned) + * + * If the master debug switch is enabled and the current settings are + * higher or equal to the requested level, then debugging + * output/actions should be enabled. + * + * NOTE: + * + * This needs to be coded so that it can be evaluated in compile + * time; this is why the ugly BUG_ON() is placed in there, so the + * D_MASTER evaluation compiles all out if it is compile-time false. + */ +#define d_test(l) \ +({ \ + unsigned __l = l; /* type enforcer */ \ + (D_MASTER) >= __l \ + && ({ \ + BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\ + D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l; \ + }); \ +}) + + +/** + * d_fnstart - log message at function start if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a) + + +/** + * d_fnend - log message at function end if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a) + + +/** + * d_printf - log message if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a) + + +/** + * d_dump - log buffer hex dump if debugging enabled + * + * @l: intended debug level + * @_dev: 'struct device' pointer, NULL if none (for context) + * @f: printf-like format and arguments + */ +#define d_dump(l, dev, ptr, size) \ +do { \ + char head[64]; \ + if (!d_test(l)) \ + break; \ + __d_head(head, sizeof(head), dev); \ + print_hex_dump(KERN_ERR, head, 0, 16, 1, \ + ((void *) ptr), (size), 0); \ +} while (0) + + +/** + * Export a submodule's debug level over debugfs as PREFIXSUBMODULE + * + * @prefix: string to prefix the name with + * @submodule: name of submodule (not a string, just the name) + * @dentry: debugfs parent dentry + * + * Returns: 0 if ok, < 0 errno on error. + * + * For removing, just use debugfs_remove_recursive() on the parent. + */ +#define d_level_register_debugfs(prefix, name, parent) \ +({ \ + int rc; \ + struct dentry *fd; \ + struct dentry *verify_parent_type = parent; \ + fd = debugfs_create_u8( \ + prefix #name, 0600, verify_parent_type, \ + &(D_LEVEL[__D_SUBMODULE_ ## name].level)); \ + rc = PTR_ERR(fd); \ + if (IS_ERR(fd) && rc != -ENODEV) \ + printk(KERN_ERR "%s: Can't create debugfs entry %s: " \ + "%d\n", __func__, prefix #name, rc); \ + else \ + rc = 0; \ + rc; \ +}) + + +#endif /* #ifndef __debug__h__ */ -- cgit v1.2.3 From ea24652d253eabfb83e955e55ce032228d9d99b9 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:43 -0800 Subject: i2400m: host/device procotol and core driver definitions The wimax/i2400m.h defines the structures and constants for the host-device protocols: - boot / firmware upload protocol - general data transport protocol - control protocol It is done in such a way that can also be used verbatim by user space. drivers/net/wimax/i2400m.h defines all the APIs used by the core, bus-generic driver (i2400m) and the bus specific drivers (i2400m-BUSNAME). It also gives a roadmap to the driver implementation. debug-levels.h adds the core driver's debug settings. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/wimax/i2400m.h | 512 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 512 insertions(+) create mode 100644 include/linux/wimax/i2400m.h (limited to 'include/linux') diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h new file mode 100644 index 000000000000..74198f5bb4dc --- /dev/null +++ b/include/linux/wimax/i2400m.h @@ -0,0 +1,512 @@ +/* + * Intel Wireless WiMax Connection 2400m + * Host-Device protocol interface definitions + * + * + * Copyright (C) 2007-2008 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Intel Corporation + * Inaky Perez-Gonzalez + * - Initial implementation + * + * + * This header defines the data structures and constants used to + * communicate with the device. + * + * BOOTMODE/BOOTROM/FIRMWARE UPLOAD PROTOCOL + * + * The firmware upload protocol is quite simple and only requires a + * handful of commands. See drivers/net/wimax/i2400m/fw.c for more + * details. + * + * The BCF data structure is for the firmware file header. + * + * + * THE DATA / CONTROL PROTOCOL + * + * This is the normal protocol spoken with the device once the + * firmware is uploaded. It transports data payloads and control + * messages back and forth. + * + * It consists 'messages' that pack one or more payloads each. The + * format is described in detail in drivers/net/wimax/i2400m/rx.c and + * tx.c. + * + * + * THE L3L4 PROTOCOL + * + * The term L3L4 refers to Layer 3 (the device), Layer 4 (the + * driver/host software). + * + * This is the control protocol used by the host to control the i2400m + * device (scan, connect, disconnect...). This is sent to / received + * as control frames. These frames consist of a header and zero or + * more TLVs with information. We call each control frame a "message". + * + * Each message is composed of: + * + * HEADER + * [TLV0 + PAYLOAD0] + * [TLV1 + PAYLOAD1] + * [...] + * [TLVN + PAYLOADN] + * + * The HEADER is defined by 'struct i2400m_l3l4_hdr'. The payloads are + * defined by a TLV structure (Type Length Value) which is a 'header' + * (struct i2400m_tlv_hdr) and then the payload. + * + * All integers are represented as Little Endian. + * + * - REQUESTS AND EVENTS + * + * The requests can be clasified as follows: + * + * COMMAND: implies a request from the host to the device requesting + * an action being performed. The device will reply with a + * message (with the same type as the command), status and + * no (TLV) payload. Execution of a command might cause + * events (of different type) to be sent later on as + * device's state changes. + * + * GET/SET: similar to COMMAND, but will not cause other + * EVENTs. The reply, in the case of GET, will contain + * TLVs with the requested information. + * + * EVENT: asynchronous messages sent from the device, maybe as a + * consequence of previous COMMANDs but disassociated from + * them. + * + * Only one request might be pending at the same time (ie: don't + * parallelize nor post another GET request before the previous + * COMMAND has been acknowledged with it's corresponding reply by the + * device). + * + * The different requests and their formats are described below: + * + * I2400M_MT_* Message types + * I2400M_MS_* Message status (for replies, events) + * i2400m_tlv_* TLVs + * + * data types are named 'struct i2400m_msg_OPNAME', OPNAME matching the + * operation. + */ + +#ifndef __LINUX__WIMAX__I2400M_H__ +#define __LINUX__WIMAX__I2400M_H__ + +#include + + +/* + * Host Device Interface (HDI) common to all busses + */ + +/* Boot-mode (firmware upload mode) commands */ + +/* Header for the firmware file */ +struct i2400m_bcf_hdr { + __le32 module_type; + __le32 header_len; + __le32 header_version; + __le32 module_id; + __le32 module_vendor; + __le32 date; /* BCD YYYMMDD */ + __le32 size; + __le32 key_size; /* in dwords */ + __le32 modulus_size; /* in dwords */ + __le32 exponent_size; /* in dwords */ + __u8 reserved[88]; +} __attribute__ ((packed)); + +/* Boot mode opcodes */ +enum i2400m_brh_opcode { + I2400M_BRH_READ = 1, + I2400M_BRH_WRITE = 2, + I2400M_BRH_JUMP = 3, + I2400M_BRH_SIGNED_JUMP = 8, + I2400M_BRH_HASH_PAYLOAD_ONLY = 9, +}; + +/* Boot mode command masks and stuff */ +enum i2400m_brh { + I2400M_BRH_SIGNATURE = 0xcbbc0000, + I2400M_BRH_SIGNATURE_MASK = 0xffff0000, + I2400M_BRH_SIGNATURE_SHIFT = 16, + I2400M_BRH_OPCODE_MASK = 0x0000000f, + I2400M_BRH_RESPONSE_MASK = 0x000000f0, + I2400M_BRH_RESPONSE_SHIFT = 4, + I2400M_BRH_DIRECT_ACCESS = 0x00000400, + I2400M_BRH_RESPONSE_REQUIRED = 0x00000200, + I2400M_BRH_USE_CHECKSUM = 0x00000100, +}; + + +/* Constants for bcf->module_id */ +enum i2400m_bcf_mod_id { + /* Firmware file carries its own pokes -- pokes are a set of + * magical values that have to be written in certain memory + * addresses to get the device up and ready for firmware + * download when it is in non-signed boot mode. */ + I2400M_BCF_MOD_ID_POKES = 0x000000001, +}; + + +/** + * i2400m_bootrom_header - Header for a boot-mode command + * + * @cmd: the above command descriptor + * @target_addr: where on the device memory should the action be performed. + * @data_size: for read/write, amount of data to be read/written + * @block_checksum: checksum value (if applicable) + * @payload: the beginning of data attached to this header + */ +struct i2400m_bootrom_header { + __le32 command; /* Compose with enum i2400_brh */ + __le32 target_addr; + __le32 data_size; + __le32 block_checksum; + char payload[0]; +} __attribute__ ((packed)); + + +/* + * Data / control protocol + */ + +/* Packet types for the host-device interface */ +enum i2400m_pt { + I2400M_PT_DATA = 0, + I2400M_PT_CTRL, + I2400M_PT_TRACE, /* For device debug */ + I2400M_PT_RESET_WARM, /* device reset */ + I2400M_PT_RESET_COLD, /* USB[transport] reset, like reconnect */ + I2400M_PT_ILLEGAL +}; + + +/* + * Payload for a data packet + * + * This is prefixed to each and every outgoing DATA type. + */ +struct i2400m_pl_data_hdr { + __le32 reserved; +} __attribute__((packed)); + + +/* Misc constants */ +enum { + I2400M_PL_PAD = 16, /* Payload data size alignment */ + I2400M_PL_SIZE_MAX = 0x3EFF, + I2400M_MAX_PLS_IN_MSG = 60, + /* protocol barkers: sync sequences; for notifications they + * are sent in groups of four. */ + I2400M_H2D_PREVIEW_BARKER = 0xcafe900d, + I2400M_COLD_RESET_BARKER = 0xc01dc01d, + I2400M_WARM_RESET_BARKER = 0x50f750f7, + I2400M_NBOOT_BARKER = 0xdeadbeef, + I2400M_SBOOT_BARKER = 0x0ff1c1a1, + I2400M_ACK_BARKER = 0xfeedbabe, + I2400M_D2H_MSG_BARKER = 0xbeefbabe, +}; + + +/* + * Hardware payload descriptor + * + * Bitfields encoded in a struct to enforce typing semantics. + * + * Look in rx.c and tx.c for a full description of the format. + */ +struct i2400m_pld { + __le32 val; +} __attribute__ ((packed)); + +#define I2400M_PLD_SIZE_MASK 0x00003fff +#define I2400M_PLD_TYPE_SHIFT 16 +#define I2400M_PLD_TYPE_MASK 0x000f0000 + +/* + * Header for a TX message or RX message + * + * @barker: preamble + * @size: used for management of the FIFO queue buffer; before + * sending, this is converted to be a real preamble. This + * indicates the real size of the TX message that starts at this + * point. If the highest bit is set, then this message is to be + * skipped. + * @sequence: sequence number of this message + * @offset: offset where the message itself starts -- see the comments + * in the file header about message header and payload descriptor + * alignment. + * @num_pls: number of payloads in this message + * @padding: amount of padding bytes at the end of the message to make + * it be of block-size aligned + * + * Look in rx.c and tx.c for a full description of the format. + */ +struct i2400m_msg_hdr { + union { + __le32 barker; + __u32 size; /* same size type as barker!! */ + }; + union { + __le32 sequence; + __u32 offset; /* same size type as barker!! */ + }; + __le16 num_pls; + __le16 rsv1; + __le16 padding; + __le16 rsv2; + struct i2400m_pld pld[0]; +} __attribute__ ((packed)); + + + +/* + * L3/L4 control protocol + */ + +enum { + /* Interface version */ + I2400M_L3L4_VERSION = 0x0100, +}; + +/* Message types */ +enum i2400m_mt { + I2400M_MT_RESERVED = 0x0000, + I2400M_MT_INVALID = 0xffff, + I2400M_MT_REPORT_MASK = 0x8000, + + I2400M_MT_GET_SCAN_RESULT = 0x4202, + I2400M_MT_SET_SCAN_PARAM = 0x4402, + I2400M_MT_CMD_RF_CONTROL = 0x4602, + I2400M_MT_CMD_SCAN = 0x4603, + I2400M_MT_CMD_CONNECT = 0x4604, + I2400M_MT_CMD_DISCONNECT = 0x4605, + I2400M_MT_CMD_EXIT_IDLE = 0x4606, + I2400M_MT_GET_LM_VERSION = 0x5201, + I2400M_MT_GET_DEVICE_INFO = 0x5202, + I2400M_MT_GET_LINK_STATUS = 0x5203, + I2400M_MT_GET_STATISTICS = 0x5204, + I2400M_MT_GET_STATE = 0x5205, + I2400M_MT_GET_MEDIA_STATUS = 0x5206, + I2400M_MT_SET_INIT_CONFIG = 0x5404, + I2400M_MT_CMD_INIT = 0x5601, + I2400M_MT_CMD_TERMINATE = 0x5602, + I2400M_MT_CMD_MODE_OF_OP = 0x5603, + I2400M_MT_CMD_RESET_DEVICE = 0x5604, + I2400M_MT_CMD_MONITOR_CONTROL = 0x5605, + I2400M_MT_CMD_ENTER_POWERSAVE = 0x5606, + I2400M_MT_GET_TLS_OPERATION_RESULT = 0x6201, + I2400M_MT_SET_EAP_SUCCESS = 0x6402, + I2400M_MT_SET_EAP_FAIL = 0x6403, + I2400M_MT_SET_EAP_KEY = 0x6404, + I2400M_MT_CMD_SEND_EAP_RESPONSE = 0x6602, + I2400M_MT_REPORT_SCAN_RESULT = 0xc002, + I2400M_MT_REPORT_STATE = 0xd002, + I2400M_MT_REPORT_POWERSAVE_READY = 0xd005, + I2400M_MT_REPORT_EAP_REQUEST = 0xe002, + I2400M_MT_REPORT_EAP_RESTART = 0xe003, + I2400M_MT_REPORT_ALT_ACCEPT = 0xe004, + I2400M_MT_REPORT_KEY_REQUEST = 0xe005, +}; + + +/* + * Message Ack Status codes + * + * When a message is replied-to, this status is reported. + */ +enum i2400m_ms { + I2400M_MS_DONE_OK = 0, + I2400M_MS_DONE_IN_PROGRESS = 1, + I2400M_MS_INVALID_OP = 2, + I2400M_MS_BAD_STATE = 3, + I2400M_MS_ILLEGAL_VALUE = 4, + I2400M_MS_MISSING_PARAMS = 5, + I2400M_MS_VERSION_ERROR = 6, + I2400M_MS_ACCESSIBILITY_ERROR = 7, + I2400M_MS_BUSY = 8, + I2400M_MS_CORRUPTED_TLV = 9, + I2400M_MS_UNINITIALIZED = 10, + I2400M_MS_UNKNOWN_ERROR = 11, + I2400M_MS_PRODUCTION_ERROR = 12, + I2400M_MS_NO_RF = 13, + I2400M_MS_NOT_READY_FOR_POWERSAVE = 14, + I2400M_MS_THERMAL_CRITICAL = 15, + I2400M_MS_MAX +}; + + +/** + * i2400m_tlv - enumeration of the different types of TLVs + * + * TLVs stand for type-length-value and are the header for a payload + * composed of almost anything. Each payload has a type assigned + * and a length. + */ +enum i2400m_tlv { + I2400M_TLV_L4_MESSAGE_VERSIONS = 129, + I2400M_TLV_SYSTEM_STATE = 141, + I2400M_TLV_MEDIA_STATUS = 161, + I2400M_TLV_RF_OPERATION = 162, + I2400M_TLV_RF_STATUS = 163, + I2400M_TLV_DEVICE_RESET_TYPE = 132, + I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601, +}; + + +struct i2400m_tlv_hdr { + __le16 type; + __le16 length; /* payload's */ + __u8 pl[0]; +} __attribute__((packed)); + + +struct i2400m_l3l4_hdr { + __le16 type; + __le16 length; /* payload's */ + __le16 version; + __le16 resv1; + __le16 status; + __le16 resv2; + struct i2400m_tlv_hdr pl[0]; +} __attribute__((packed)); + + +/** + * i2400m_system_state - different states of the device + */ +enum i2400m_system_state { + I2400M_SS_UNINITIALIZED = 1, + I2400M_SS_INIT, + I2400M_SS_READY, + I2400M_SS_SCAN, + I2400M_SS_STANDBY, + I2400M_SS_CONNECTING, + I2400M_SS_WIMAX_CONNECTED, + I2400M_SS_DATA_PATH_CONNECTED, + I2400M_SS_IDLE, + I2400M_SS_DISCONNECTING, + I2400M_SS_OUT_OF_ZONE, + I2400M_SS_SLEEPACTIVE, + I2400M_SS_PRODUCTION, + I2400M_SS_CONFIG, + I2400M_SS_RF_OFF, + I2400M_SS_RF_SHUTDOWN, + I2400M_SS_DEVICE_DISCONNECT, + I2400M_SS_MAX, +}; + + +/** + * i2400m_tlv_system_state - report on the state of the system + * + * @state: see enum i2400m_system_state + */ +struct i2400m_tlv_system_state { + struct i2400m_tlv_hdr hdr; + __le32 state; +} __attribute__((packed)); + + +struct i2400m_tlv_l4_message_versions { + struct i2400m_tlv_hdr hdr; + __le16 major; + __le16 minor; + __le16 branch; + __le16 reserved; +} __attribute__((packed)); + + +struct i2400m_tlv_detailed_device_info { + struct i2400m_tlv_hdr hdr; + __u8 reserved1[400]; + __u8 mac_address[6]; + __u8 reserved2[2]; +} __attribute__((packed)); + + +enum i2400m_rf_switch_status { + I2400M_RF_SWITCH_ON = 1, + I2400M_RF_SWITCH_OFF = 2, +}; + +struct i2400m_tlv_rf_switches_status { + struct i2400m_tlv_hdr hdr; + __u8 sw_rf_switch; /* 1 ON, 2 OFF */ + __u8 hw_rf_switch; /* 1 ON, 2 OFF */ + __u8 reserved[2]; +} __attribute__((packed)); + + +enum { + i2400m_rf_operation_on = 1, + i2400m_rf_operation_off = 2 +}; + +struct i2400m_tlv_rf_operation { + struct i2400m_tlv_hdr hdr; + __le32 status; /* 1 ON, 2 OFF */ +} __attribute__((packed)); + + +enum i2400m_tlv_reset_type { + I2400M_RESET_TYPE_COLD = 1, + I2400M_RESET_TYPE_WARM +}; + +struct i2400m_tlv_device_reset_type { + struct i2400m_tlv_hdr hdr; + __le32 reset_type; +} __attribute__((packed)); + + +struct i2400m_tlv_config_idle_parameters { + struct i2400m_tlv_hdr hdr; + __le32 idle_timeout; /* 100 to 300000 ms [5min], 100 increments + * 0 disabled */ + __le32 idle_paging_interval; /* frames */ +} __attribute__((packed)); + + +enum i2400m_media_status { + I2400M_MEDIA_STATUS_LINK_UP = 1, + I2400M_MEDIA_STATUS_LINK_DOWN, + I2400M_MEDIA_STATUS_LINK_RENEW, +}; + +struct i2400m_tlv_media_status { + struct i2400m_tlv_hdr hdr; + __le32 media_status; +} __attribute__((packed)); + +#endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */ -- cgit v1.2.3 From e30698743419d20dce03d033761f203b4d847ab0 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Sat, 20 Dec 2008 16:57:59 -0800 Subject: wimax: export linux/wimax.h and linux/wimax/i2400m.h with headers_install These two files are what user space can use to establish communication with the WiMAX kernel API and to speak the Intel 2400m Wireless WiMAX connection's control protocol. Signed-off-by: Inaky Perez-Gonzalez Cc: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/Kbuild | 2 ++ include/linux/wimax/Kbuild | 1 + 2 files changed, 3 insertions(+) create mode 100644 include/linux/wimax/Kbuild (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index a3323f337e4d..12e9a2957caf 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -371,3 +371,5 @@ unifdef-y += xattr.h unifdef-y += xfrm.h objhdr-y += version.h +header-y += wimax.h +header-y += wimax/ diff --git a/include/linux/wimax/Kbuild b/include/linux/wimax/Kbuild new file mode 100644 index 000000000000..3cb4f269bb09 --- /dev/null +++ b/include/linux/wimax/Kbuild @@ -0,0 +1 @@ +header-y += i2400m.h -- cgit v1.2.3 From f7b7baae6b30ff04124259ff8d7c0c0d281320e6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 11 Nov 2008 17:17:46 +0800 Subject: PCI: add PCI Advanced Feature Capability defines PCI Advanced Features Capability is introduced by "Conventional PCI Advanced Caps ECN" (can be downloaded in pcisig.com). Add defines for the various AF capabilities, including function level reset (FLR). Reviewed-by: Matthew Wilcox Signed-off-by: Sheng Yang Signed-off-by: Jesse Barnes --- include/linux/pci_regs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e5effd47ed74..7766488470e4 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -210,6 +210,7 @@ #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ #define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ +#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ #define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ #define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ #define PCI_CAP_SIZEOF 4 @@ -316,6 +317,17 @@ #define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ #define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ +/* PCI Advanced Feature registers */ + +#define PCI_AF_LENGTH 2 +#define PCI_AF_CAP 3 +#define PCI_AF_CAP_TP 0x01 +#define PCI_AF_CAP_FLR 0x02 +#define PCI_AF_CTRL 4 +#define PCI_AF_CTRL_FLR 0x01 +#define PCI_AF_STATUS 5 +#define PCI_AF_STATUS_TP 0x01 + /* PCI-X registers */ #define PCI_X_CMD 2 /* Modes & Features */ -- cgit v1.2.3 From 8b62091e20215730be1b94b7cd135a78a3e692ca Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:40 -0700 Subject: ACPI/PCI: include missing acpi.h file in pci-acpi.h. The pci-acpi.h file will not compile without including linux/acpi.h. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 8837928fbf33..a9e4c34e9389 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -8,6 +8,8 @@ #ifndef _PCI_ACPI_H_ #define _PCI_ACPI_H_ +#include + #define OSC_QUERY_TYPE 0 #define OSC_SUPPORT_TYPE 1 #define OSC_CONTROL_TYPE 2 -- cgit v1.2.3 From 990a7ac5645883a833a11b900bb6f25b65dea65b Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:45 -0700 Subject: ACPI/PCI: call _OSC support during root bridge discovery Add pci_acpi_osc_support() and call it when a PCI bridge is added. This allows us to avoid having every individual PCI root bridge driver call _OSC support for every root bridge in their probe functions, a significant savings in boot time. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index a9e4c34e9389..424f06f84cab 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -51,6 +51,7 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); extern acpi_status __pci_osc_support_set(u32 flags, const char *hid); +int pci_acpi_osc_support(acpi_handle handle, u32 flags); static inline acpi_status pci_osc_support_set(u32 flags) { return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING); -- cgit v1.2.3 From 0ef5f8f6159e44b4faa997be08d1a3bcbf44ad08 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:50 -0700 Subject: ACPI/PCI: PCI extended config _OSC support called when root bridge added The _OSC capability OSC_EXT_PCI_CONFIG_SUPPORT is set when the root bridge is added with pci_acpi_osc_support() if we can access PCI extended config space. This adds the function pci_ext_cfg_avail which returns true if we can access PCI extended config space (offset greater than 0xff). It currently only returns false if arch=x86 and raw_pci_ext_ops is not set (which might happen if pci=nommcfg is set on the kernel command-line). Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4bb156ba854a..6fd47654ca4e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1140,6 +1140,8 @@ static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif +int pci_ext_cfg_avail(struct pci_dev *dev); + #ifdef CONFIG_HAS_IOMEM static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) { -- cgit v1.2.3 From 3e1b16002af29758b6bc9c38939d43838d9335bc Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:55 -0700 Subject: ACPI/PCI: PCIe ASPM _OSC support capabilities called when root bridge added The _OSC capabilities OSC_ACTIVE_STATE_PWR_SUPPORT and OSC_CLOCK_PWR_CAPABILITY_SUPPORT are set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the ASPM driver. Also add the function pcie_aspm_enabled, which returns true if pcie_aspm=off is not on the kernel command-line. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 6fd47654ca4e..eae97a2bf603 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -791,6 +791,15 @@ extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); #endif +#ifndef CONFIG_PCIEASPM +static inline int pcie_aspm_enabled(void) +{ + return 0; +} +#else +extern int pcie_aspm_enabled(void); +#endif + #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); -- cgit v1.2.3 From 07ae95f988a34465bdcb384bfa73c03424fe2312 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:05 -0700 Subject: ACPI/PCI: PCI MSI _OSC support capabilities called when root bridge added The _OSC capability OSC_MSI_SUPPORT is set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the PCI MSI driver. Also adds the function pci_msi_enabled, which returns true if pci=nomsi is not on the kernel command-line. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eae97a2bf603..59a3dc2059d3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -779,6 +779,10 @@ static inline void msi_remove_pci_irq_vectors(struct pci_dev *dev) static inline void pci_restore_msi_state(struct pci_dev *dev) { } +static inline int pci_msi_enabled(void) +{ + return 0; +} #else extern int pci_enable_msi(struct pci_dev *dev); extern void pci_msi_shutdown(struct pci_dev *dev); @@ -789,6 +793,7 @@ extern void pci_msix_shutdown(struct pci_dev *dev); extern void pci_disable_msix(struct pci_dev *dev); extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); +extern int pci_msi_enabled(void); #endif #ifndef CONFIG_PCIEASPM -- cgit v1.2.3 From 23616941914917cf25b94789856b5326b68d8ee8 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:10 -0700 Subject: ACPI/PCI: remove obsolete _OSC capability support functions The acpi_query_osc, __pci_osc_support_set, pci_osc_support_set, and pcie_osc_support_set functions have been obsoleted in favor of setting these capabilities during root bridge discovery with pci_acpi_osc_support. There are no longer any callers of these functions, so remove them. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 424f06f84cab..871e096e0fbc 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -50,16 +50,7 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); -extern acpi_status __pci_osc_support_set(u32 flags, const char *hid); int pci_acpi_osc_support(acpi_handle handle, u32 flags); -static inline acpi_status pci_osc_support_set(u32 flags) -{ - return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING); -} -static inline acpi_status pcie_osc_support_set(u32 flags) -{ - return __pci_osc_support_set(flags, PCI_EXPRESS_ROOT_HID_STRING); -} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { /* Find root host bridge */ @@ -76,8 +67,6 @@ typedef u32 acpi_status; #endif static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) {return AE_ERROR;} -static inline acpi_status pci_osc_support_set(u32 flags) {return AE_ERROR;} -static inline acpi_status pcie_osc_support_set(u32 flags) {return AE_ERROR;} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { return NULL; } #endif -- cgit v1.2.3 From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 22 Oct 2008 19:55:31 -0700 Subject: resource: allow MMIO exclusivity for device drivers Device drivers that use pci_request_regions() (and similar APIs) have a reasonable expectation that they are the only ones accessing their device. As part of the e1000e hunt, we were afraid that some userland (X or some bootsplash stuff) was mapping the MMIO region that the driver thought it had exclusively via /dev/mem or via various sysfs resource mappings. This patch adds the option for device drivers to cause their reserved regions to the "banned from /dev/mem use" list, so now both kernel memory and device-exclusive MMIO regions are banned. NOTE: This is only active when CONFIG_STRICT_DEVMEM is set. In addition to the config option, a kernel parameter iomem=relaxed is provided for the cases where developers want to diagnose, in the field, drivers issues from userspace. Reviewed-by: Matthew Wilcox Signed-off-by: Arjan van de Ven Signed-off-by: Jesse Barnes --- include/linux/ioport.h | 11 ++++++++--- include/linux/pci.h | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 041e95aac2bf..f6bb2ca8e3ba 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,6 +49,7 @@ struct resource_list { #define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */ +#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 @@ -133,13 +134,16 @@ static inline unsigned long resource_type(struct resource *res) } /* Convenience shorthand with allocation */ -#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) -#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name)) +#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) +#define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) +#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) +#define request_mem_region_exclusive(start,n,name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name); + resource_size_t n, const char *name, int relaxed); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) @@ -175,6 +179,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); +extern int iomem_is_exclusive(u64 addr); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 59a3dc2059d3..bfcb39ca8879 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -686,10 +686,13 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 int __must_check pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); int __must_check pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region_exclusive(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); int pci_request_selected_regions(struct pci_dev *, int, const char *); +int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); /* drivers/pci/bus.c */ -- cgit v1.2.3 From 57c2cf71c12318b72ebaa5720d210476b6bac4d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 11 Dec 2008 11:24:23 -0700 Subject: PCI: add pci_swizzle_interrupt_pin() This patch adds pci_swizzle_interrupt_pin(), which implements the INTx swizzling algorithm specified in Table 9-1 of the "PCI-to-PCI Bridge Architecture Specification," revision 1.2. There are many architecture-specific implementations of this swizzle that can be replaced by this common one. Reviewed-by: David Howells Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index bfcb39ca8879..58357d14f94c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -532,6 +532,7 @@ int __must_check pci_bus_add_device(struct pci_dev *dev); void pci_read_bridge_bases(struct pci_bus *child); struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); +u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); -- cgit v1.2.3 From 1684f5ddd4c0c754f52c78eaa2c5c69ad09fb18c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 1 Dec 2008 14:30:30 -0800 Subject: PCI: uninline pci_ioremap_bar() It's too large to be inlined. Acked-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- include/linux/pci.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 58357d14f94c..0d8bc920c2e5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1160,20 +1160,7 @@ static inline void pci_mmcfg_late_init(void) { } int pci_ext_cfg_avail(struct pci_dev *dev); -#ifdef CONFIG_HAS_IOMEM -static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) -{ - /* - * Make sure the BAR is actually a memory resource, not an IO resource - */ - if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { - WARN_ON(1); - return NULL; - } - return ioremap_nocache(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar)); -} -#endif +void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From 14add80b5120966fe0659d61815b9e9b4b68fdc5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:38:52 +0800 Subject: PCI: remove unnecessary arg of pci_update_resource() This cleanup removes unnecessary argument 'struct resource *res' in pci_update_resource(), so it takes same arguments as other companion functions (pci_assign_resource(), etc.). Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0d8bc920c2e5..c5e02f324e13 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -648,7 +648,7 @@ int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); int pci_reset_function(struct pci_dev *dev); int pci_execute_reset_function(struct pci_dev *dev); -void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); +void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); -- cgit v1.2.3 From fde09c6d8f92de0c9f75698a75f0989f2234c517 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:39:32 +0800 Subject: PCI: define PCI resource names in an 'enum' This patch moves all definitions of the PCI resource names to an 'enum', and also replaces some hard-coded resource variables with symbol names. This change eases introduction of device specific resources. Reviewed-by: Bjorn Helgaas Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci.h | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c5e02f324e13..da1c22bab40e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -82,7 +82,30 @@ enum pci_mmap_state { #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 -#define DEVICE_COUNT_RESOURCE 12 +/* + * For PCI devices, the region numbers are assigned this way: + */ +enum { + /* #0-5: standard PCI resources */ + PCI_STD_RESOURCES, + PCI_STD_RESOURCE_END = 5, + + /* #6: expansion ROM resource */ + PCI_ROM_RESOURCE, + + /* resources assigned to buses behind the bridge */ +#define PCI_BRIDGE_RESOURCE_NUM 4 + + PCI_BRIDGE_RESOURCES, + PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES + + PCI_BRIDGE_RESOURCE_NUM - 1, + + /* total resources associated with a PCI device */ + PCI_NUM_RESOURCES, + + /* preserve this for compatibility */ + DEVICE_COUNT_RESOURCE +}; typedef int __bitwise pci_power_t; @@ -274,18 +297,6 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space); } -/* - * For PCI devices, the region numbers are assigned this way: - * - * 0-5 standard PCI regions - * 6 expansion ROM - * 7-10 bridges: address space assigned to buses behind the bridge - */ - -#define PCI_ROM_RESOURCE 6 -#define PCI_BRIDGE_RESOURCES 7 -#define PCI_NUM_RESOURCES 11 - #ifndef PCI_BUS_NUM_RESOURCES #define PCI_BUS_NUM_RESOURCES 16 #endif -- cgit v1.2.3 From e8c331e963c58b83db24b7d0e39e8c07f687dbc6 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 17 Dec 2008 12:09:12 +0900 Subject: PCI hotplug: introduce functions for ACPI slot detection Some ACPI related PCI hotplug code can be shared among PCI hotplug drivers. This patch introduces the following functions in drivers/pci/hotplug/acpi_pcihp.c to share the code, and changes acpiphp and pciehp to use them. - int acpi_pci_detect_ejectable(struct pci_bus *pbus) This checks if the specified PCI bus has ejectable slots. - int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle) This checks if the specified handle is ejectable ACPI PCI slot. The 'pbus' parameter is needed to check if 'handle' is PCI related ACPI object. This patch also introduces the following inline function in include/linux/pci-acpi.h, which is useful to get ACPI handle of the PCI bridge from struct pci_bus of the bridge's secondary bus. - static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) This returns ACPI handle of the PCI bridge which generates PCI bus specified by 'pbus'. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 9 +++++++++ include/linux/pci_hotplug.h | 2 ++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 871e096e0fbc..042c166f65d5 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -60,6 +60,15 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus), pdev->bus->number); } + +static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) +{ + int seg = pci_domain_nr(pbus), busnr = pbus->number; + struct pci_dev *bridge = pbus->self; + if (bridge) + return DEVICE_ACPI_HANDLE(&(bridge->dev)); + return acpi_get_pci_rootbridge_handle(seg, busnr); +} #else #if !defined(AE_ERROR) typedef u32 acpi_status; diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a00bd1a0f156..f7cc204fab07 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -228,6 +228,8 @@ extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); int acpi_root_bridge(acpi_handle handle); +int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle); +int acpi_pci_detect_ejectable(struct pci_bus *pbus); #endif #endif -- cgit v1.2.3 From 68feac87de15edfc2c700d2d81b814288c93d003 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 16 Dec 2008 21:36:55 -0700 Subject: PCI: add pci_common_swizzle() for INTx swizzling This patch adds pci_common_swizzle(), which swizzles INTx values all the way up to a root bridge. This common implementation can replace several architecture-specific ones. This should someday be combined with pci_get_interrupt_pin(), but I left it separate for now to make reviewing easier. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index da1c22bab40e..170f9ae2d8a0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -545,6 +545,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); +u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); extern void pci_remove_bus(struct pci_bus *b); -- cgit v1.2.3 From 287d19ce2e67c15e79a187b3bdcbbea1a0a51a7d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: revise VPD access interface Change PCI VPD API which was only used by sysfs to something usable in drivers. * move iteration over multiple words to the low level * use conventional types for arguments * add exportable wrapper Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 170f9ae2d8a0..76079e106895 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -687,6 +687,10 @@ int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +/* Vital product data routines */ +ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); +ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); + /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); -- cgit v1.2.3 From db5679437a2b938c9127480a3923633721583a4f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: add interface to set visible size of VPD The VPD on all devices may not be 32K. Unfortunately, there is no generic way to find the size, so this adds a simple API hook to reset it. Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 76079e106895..7cbecef19bb6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -690,6 +690,7 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); +int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); -- cgit v1.2.3 From 322162a71bd9fc4edb1b11236e7bc8aa27ccac22 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 19 Dec 2008 15:19:02 +0900 Subject: PCI: pciehp: cleanup register and field definitions Clean up register definitions related to PCI Express Hot plug. - Add register definitions into include/linux/pci_regs.h, and use them instead of pciehp's locally definied register definitions. - Remove pciehp's locally defined register definitions - Remove unused register definitions in pciehp. - Some minor cleanups. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci_regs.h | 64 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 7766488470e4..027815b4635e 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -411,20 +411,70 @@ #define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ -#define PCI_EXP_LNKCAP_ASPMS 0xc00 /* ASPM Support */ -#define PCI_EXP_LNKCAP_L0SEL 0x7000 /* L0s Exit Latency */ -#define PCI_EXP_LNKCAP_L1EL 0x38000 /* L1 Exit Latency */ -#define PCI_EXP_LNKCAP_CLKPM 0x40000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ +#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ +#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SDERC 0x00080000 /* Suprise Down Error Reporting Capable */ +#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ +#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ +#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ #define PCI_EXP_LNKCTL 16 /* Link Control */ -#define PCI_EXP_LNKCTL_RL 0x20 /* Retrain Link */ -#define PCI_EXP_LNKCTL_CCC 0x40 /* Common Clock COnfiguration */ +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ +#define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ +#define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ +#define PCI_EXP_LNKCTL_CCC 0x0040 /* Common Clock Configuration */ +#define PCI_EXP_LNKCTL_ES 0x0080 /* Extended Synch */ #define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ +#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ +#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ #define PCI_EXP_LNKSTA 18 /* Link Status */ -#define PCI_EXP_LNKSTA_LT 0x800 /* Link Training */ +#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ +#define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ #define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ +#define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ +#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ +#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ #define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ +#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ +#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +#define PCI_EXP_SLTCAP_AIP 0x00000008 /* Attention Indicator Present */ +#define PCI_EXP_SLTCAP_PIP 0x00000010 /* Power Indicator Present */ +#define PCI_EXP_SLTCAP_HPS 0x00000020 /* Hot-Plug Surprise */ +#define PCI_EXP_SLTCAP_HPC 0x00000040 /* Hot-Plug Capable */ +#define PCI_EXP_SLTCAP_SPLV 0x00007f80 /* Slot Power Limit Value */ +#define PCI_EXP_SLTCAP_SPLS 0x00018000 /* Slot Power Limit Scale */ +#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ +#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ +#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ #define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ +#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ +#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +#define PCI_EXP_SLTCTL_PDCE 0x0008 /* Presence Detect Changed Enable */ +#define PCI_EXP_SLTCTL_CCIE 0x0010 /* Command Completed Interrupt Enable */ +#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */ +#define PCI_EXP_SLTCTL_AIC 0x00c0 /* Attention Indicator Control */ +#define PCI_EXP_SLTCTL_PIC 0x0300 /* Power Indicator Control */ +#define PCI_EXP_SLTCTL_PCC 0x0400 /* Power Controller Control */ +#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ +#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ #define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ +#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ +#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +#define PCI_EXP_SLTSTA_PDC 0x0008 /* Presence Detect Changed */ +#define PCI_EXP_SLTSTA_CC 0x0010 /* Command Completed */ +#define PCI_EXP_SLTSTA_MRLSS 0x0020 /* MRL Sensor State */ +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ +#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ +#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ #define PCI_EXP_RTCTL 28 /* Root Control */ #define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ #define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ -- cgit v1.2.3 From 6a479079c07211bf348ac8a79754f26bea258f26 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 23 Dec 2008 03:08:29 +0000 Subject: PCI: Add pci_clear_master() as opposite of pci_set_master() During an online device reset it may be useful to disable bus-mastering. pci_disable_device() does that, and far more besides, so is not suitable for an online reset. Add pci_clear_master() which does just this. Signed-off-by: Ben Hutchings Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7cbecef19bb6..0f6d2bb1df9c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -642,6 +642,7 @@ static inline int pci_is_managed(struct pci_dev *pdev) void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); +void pci_clear_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); -- cgit v1.2.3 From 16cf0ebc35dd63f72628ba1246132a6fd17bced2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Jan 2009 14:50:27 +0100 Subject: x86/PCI: Do not use interrupt links for devices using MSI-X pcibios_enable_device() and pcibios_disable_device() don't handle IRQs for devices that have MSI enabled and it should treat the devices with MSI-X enabled in the same way. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0f6d2bb1df9c..80f8b8b65fde 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -336,6 +336,15 @@ struct pci_bus { #define pci_bus_b(n) list_entry(n, struct pci_bus, node) #define to_pci_bus(n) container_of(n, struct pci_bus, dev) +#ifdef CONFIG_PCI_MSI +static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) +{ + return pci_dev->msi_enabled || pci_dev->msix_enabled; +} +#else +static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; } +#endif + /* * Error values that may be returned by PCI functions. */ -- cgit v1.2.3 From 940fbf411e5fb42aee8ab7dd814b24080951dbfc Mon Sep 17 00:00:00 2001 From: Detlef Riekenberg Date: Wed, 7 Jan 2009 10:11:44 +0100 Subject: linux/types.h: Don't depend on __GNUC__ for __le64/__be64 The typedefs for __u64 and __s64 where fixed to be available for other compiler on May 2 2008 by H. Peter Anvin (in commit edfa5cfa3dc5) Acked-by: H. Peter Anvin Signed-off-by: Detlef Riekenberg Signed-off-by: Linus Torvalds --- include/linux/types.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 3b864f2d9560..712ca53bc348 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -176,10 +176,9 @@ typedef __u16 __bitwise __le16; typedef __u16 __bitwise __be16; typedef __u32 __bitwise __le32; typedef __u32 __bitwise __be32; -#if defined(__GNUC__) typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; -#endif + typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; -- cgit v1.2.3 From 8d1a0a13edecfdcb47fee3238ed4a2af2a2867f9 Mon Sep 17 00:00:00 2001 From: Anders Larsen Date: Thu, 1 Jan 2009 17:17:35 +0100 Subject: qnx: include for definitions of __[us]{8,16,32,64} types On 2008-12-30 11:32:33, Sam Ravnborg wrote: > We have added a few additional validation checks of the userspace headers: ... > 3) We should include and not > 4) If we use a __[us]{8,16,32,64} type then we must include Satisfy these requirements for the linux/qnx*.h headers. Signed-off-by: Anders Larsen Signed-off-by: Sam Ravnborg --- include/linux/qnx4_fs.h | 4 +--- include/linux/qnxtypes.h | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 34a196ee7941..787d19ea9f46 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -2,14 +2,12 @@ * Name : qnx4_fs.h * Author : Richard Frowijn * Function : qnx4 global filesystem definitions - * Version : 1.0.2 - * Last modified : 2000-01-31 - * * History : 23-03-1998 created */ #ifndef _LINUX_QNX4_FS_H #define _LINUX_QNX4_FS_H +#include #include #include diff --git a/include/linux/qnxtypes.h b/include/linux/qnxtypes.h index a3eb1137857b..bebbe5cc4fb8 100644 --- a/include/linux/qnxtypes.h +++ b/include/linux/qnxtypes.h @@ -2,9 +2,6 @@ * Name : qnxtypes.h * Author : Richard Frowijn * Function : standard qnx types - * Version : 1.0.2 - * Last modified : 2000-01-06 - * * History : 22-03-1998 created * */ @@ -12,6 +9,8 @@ #ifndef _QNX4TYPES_H #define _QNX4TYPES_H +#include + typedef __le16 qnx4_nxtnt_t; typedef __u8 qnx4_ftype_t; -- cgit v1.2.3 From 14f0ca8eaea42a5b5a69cfcb699665dd2618db5f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 7 Jan 2009 21:50:22 +0100 Subject: oprofile: make new cpu buffer functions part of the api This patch creates the new functions oprofile_write_reserve() oprofile_add_data() oprofile_write_commit() and makes them part of the oprofile api. Signed-off-by: Robert Richter --- include/linux/oprofile.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1ce9fe572e51..1d9518bc4c58 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -164,4 +164,22 @@ void oprofile_put_buff(unsigned long *buf, unsigned int start, unsigned long oprofile_get_cpu_buffer_size(void); void oprofile_cpu_buffer_inc_smpl_lost(void); +/* cpu buffer functions */ + +struct op_sample; + +struct op_entry { + struct ring_buffer_event *event; + struct op_sample *sample; + unsigned long irq_flags; + unsigned long size; + unsigned long *data; +}; + +void oprofile_write_reserve(struct op_entry *entry, + struct pt_regs * const regs, + unsigned long pc, int code, int size); +int oprofile_add_data(struct op_entry *entry, unsigned long val); +int oprofile_write_commit(struct op_entry *entry); + #endif /* OPROFILE_H */ -- cgit v1.2.3 From 87df4de8073f922a1f643b9fa6ba0412d5529ecf Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 15 Dec 2008 19:42:03 +0200 Subject: nfsd: last_byte_offset refactor the nfs4 server lock code to use last_byte_offset to compute the last byte covered by the lock. Check for overflow so that the last byte is set to NFS4_MAX_UINT64 if offset + len wraps around. Also, use NFS4_MAX_UINT64 for ~(u64)0 where appropriate. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index ea0366769484..b912311a56b1 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -88,6 +88,8 @@ #define NFS4_ACE_GENERIC_EXECUTE 0x001200A0 #define NFS4_ACE_MASK_ALL 0x001F01FF +#define NFS4_MAX_UINT64 (~(u64)0) + enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0, NFS4_ACL_WHO_OWNER, -- cgit v1.2.3 From db43910cb42285a99f45f7e0a0a32e32d0b61dcf Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 15 Dec 2008 19:42:24 +0200 Subject: nfsd: get rid of NFSD_VERSION Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfsd/nfsd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 21269405ffe2..e19f45991b2e 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -23,7 +23,6 @@ /* * nfsd version */ -#define NFSD_VERSION "0.5" #define NFSD_SUPPORTED_MINOR_VERSION 0 /* -- cgit v1.2.3 From 5886188dc7ba9a76babcd37452f44079a9a77f71 Mon Sep 17 00:00:00 2001 From: Benjamin Krill Date: Wed, 7 Jan 2009 10:32:38 +0100 Subject: serial: Add driver for the Cell Network Processor serial port NWP device Add support for the nwp serial device which is connected to a DCR bus. It uses the of_serial device driver to determine necessary properties from the device tree. The supported device is added as serial port number 85. NWP stands for network processor and it is part of the QPACE - Quantum Chromodynamics Parallel Computing on the Cell Broadband Engine project. The implementation is a lightweight uart implementation with the focus to consume as little resources as possible and it is connected to a DCR bus. Signed-off-by: Benjamin Krill Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- include/linux/nwpserial.h | 18 ++++++++++++++++++ include/linux/serial_core.h | 3 +++ 2 files changed, 21 insertions(+) create mode 100644 include/linux/nwpserial.h (limited to 'include/linux') diff --git a/include/linux/nwpserial.h b/include/linux/nwpserial.h new file mode 100644 index 000000000000..9acb21572eaf --- /dev/null +++ b/include/linux/nwpserial.h @@ -0,0 +1,18 @@ +/* + * Serial Port driver for a NWP uart device + * + * Copyright (C) 2008 IBM Corp., Benjamin Krill + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#ifndef _NWPSERIAL_H +#define _NWPSERIAL_H + +int nwpserial_register_port(struct uart_port *port); +void nwpserial_unregister_port(int line); + +#endif /* _NWPSERIAL_H */ diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index b4199841f1fc..90bbbf0b1161 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -161,6 +161,9 @@ #define PORT_S3C6400 84 +/* NWPSERIAL */ +#define PORT_NWPSERIAL 85 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From 8feae13110d60cc6287afabc2887366b0eb226c2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make VMAs per MM as for MMU-mode linux Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells Tested-by: Mike Frysinger Acked-by: Paul Mundt --- include/linux/mm.h | 18 ++++++------------ include/linux/mm_types.h | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4a3d28c86443..b91a73fd1bcc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr; extern struct kmem_cache *vm_area_cachep; -/* - * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is - * disabled, then there's a single shared list of VMAs maintained by the - * system, and mm's subscribe to these individually - */ -struct vm_list_struct { - struct vm_list_struct *next; - struct vm_area_struct *vma; -}; - #ifndef CONFIG_MMU -extern struct rb_root nommu_vma_tree; -extern struct rw_semaphore nommu_vma_sem; +extern struct rb_root nommu_region_tree; +extern struct rw_semaphore nommu_region_sem; extern unsigned int kobjsize(const void *objp); #endif @@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); extern void setup_per_zone_pages_min(void); extern void mem_init(void); +extern void __init mmap_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void); static inline void setup_per_cpu_pageset(void) {} #endif +/* nommu.c */ +extern atomic_t mmap_pages_allocated; + /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9cfc9b627fdd..1c1e0d3a1714 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -96,6 +96,22 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; +/* + * A region containing a mapping of a non-memory backed file under NOMMU + * conditions. These are held in a global tree and are pinned by the VMAs that + * map parts of them. + */ +struct vm_region { + struct rb_node vm_rb; /* link in global region tree */ + unsigned long vm_flags; /* VMA vm_flags */ + unsigned long vm_start; /* start address of region */ + unsigned long vm_end; /* region initialised to here */ + unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ + struct file *vm_file; /* the backing file or NULL */ + + atomic_t vm_usage; /* region usage count */ +}; + /* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -152,7 +168,7 @@ struct vm_area_struct { unsigned long vm_truncate_count;/* truncate_count or restart_addr */ #ifndef CONFIG_MMU - atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ + struct vm_region *vm_region; /* NOMMU mapping region */ #endif #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ -- cgit v1.2.3 From dd8632a12e500a684478fea0951f380478d56fed Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make mmap allocation page trimming behaviour configurable. NOMMU mmap allocates a piece of memory for an mmap that's rounded up in size to the nearest power-of-2 number of pages. Currently it then discards the excess pages back to the page allocator, making that memory available for use by other things. This can, however, cause greater amount of fragmentation. To counter this, a sysctl is added in order to fine-tune the trimming behaviour. The default behaviour remains to trim pages aggressively, while this can either be disabled completely or set to a higher page-granular watermark in order to have finer-grained control. vm region vm_top bits taken from an earlier patch by David Howells. Signed-off-by: Paul Mundt Signed-off-by: David Howells Tested-by: Mike Frysinger --- include/linux/mm_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c1e0d3a1714..92915e81443f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -106,6 +106,7 @@ struct vm_region { unsigned long vm_flags; /* VMA vm_flags */ unsigned long vm_start; /* start address of region */ unsigned long vm_end; /* region initialised to here */ + unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ -- cgit v1.2.3 From e2387d6c20752ccdb2895ba5de664fa39652f4cc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 17 Nov 2008 14:35:44 +0000 Subject: leds: Make header variable naming consistent There is one place where the struct led_classdev as the function argument is named differently. Fix it. Signed-off-by: Wolfram Sang Signed-off-by: Richard Purdie --- include/linux/leds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index d3a73f5a48c3..3c1a8ce6a5ea 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -62,7 +62,7 @@ struct led_classdev { extern int led_classdev_register(struct device *parent, struct led_classdev *led_cdev); -extern void led_classdev_unregister(struct led_classdev *lcd); +extern void led_classdev_unregister(struct led_classdev *led_cdev); extern void led_classdev_suspend(struct led_classdev *led_cdev); extern void led_classdev_resume(struct led_classdev *led_cdev); -- cgit v1.2.3 From 934cd3f979a1daacbd403398f2c7a8f6720c33aa Mon Sep 17 00:00:00 2001 From: Riku Voipio Date: Wed, 3 Dec 2008 08:21:36 +0000 Subject: leds: leds-pcs9532 - Move i2c work to a workqueque Apparently these might be called under atomic context, and i2c operations may sleep. BUG found by Ross Burton Signed-off-by: Riku Voipio Signed-off-by: Richard Purdie --- include/linux/leds-pca9532.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h index 81b4207deb95..96eea90f01a8 100644 --- a/include/linux/leds-pca9532.h +++ b/include/linux/leds-pca9532.h @@ -15,6 +15,7 @@ #define __LINUX_PCA9532_H #include +#include enum pca9532_state { PCA9532_OFF = 0x0, @@ -31,6 +32,7 @@ struct pca9532_led { struct i2c_client *client; char *name; struct led_classdev ldev; + struct work_struct work; enum pca9532_type type; enum pca9532_state state; }; -- cgit v1.2.3 From 0081e8020ebd814a99e45720a10e869a54ee08a6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 4 Dec 2008 16:52:33 +0000 Subject: leds: Add WM8350 LED driver The voltage and current regulators on the WM8350 AudioPlus PMIC can be used in concert to provide a power efficient LED driver. This driver implements support for this within the standard LED class. Platform initialisation code should configure the LED hardware in the init callback provided by the WM8350 core driver. The callback should use wm8350_isink_set_flash(), wm8350_dcdc25_set_mode() and wm8350_dcdc_set_slot() to configure the operating parameters of the regulators for their hardware and then then use wm8350_register_led() to instantiate the LED driver. This driver was originally written by Liam Girdwood, though it has been extensively modified since then. Signed-off-by: Mark Brown Signed-off-by: Richard Purdie --- include/linux/mfd/wm8350/pmic.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h index 96acbfc8aa12..be3264e286e0 100644 --- a/include/linux/mfd/wm8350/pmic.h +++ b/include/linux/mfd/wm8350/pmic.h @@ -13,6 +13,10 @@ #ifndef __LINUX_MFD_WM8350_PMIC_H #define __LINUX_MFD_WM8350_PMIC_H +#include +#include +#include + /* * Register values. */ @@ -700,6 +704,33 @@ struct wm8350; struct platform_device; struct regulator_init_data; +/* + * WM8350 LED platform data + */ +struct wm8350_led_platform_data { + const char *name; + const char *default_trigger; + int max_uA; +}; + +struct wm8350_led { + struct platform_device *pdev; + struct mutex mutex; + struct work_struct work; + spinlock_t value_lock; + enum led_brightness value; + struct led_classdev cdev; + int max_uA_index; + int enabled; + + struct regulator *isink; + struct regulator_consumer_supply isink_consumer; + struct regulator_init_data isink_init; + struct regulator *dcdc; + struct regulator_consumer_supply dcdc_consumer; + struct regulator_init_data dcdc_init; +}; + struct wm8350_pmic { /* Number of regulators of each type on this device */ int max_dcdc; @@ -717,10 +748,15 @@ struct wm8350_pmic { /* regulator devices */ struct platform_device *pdev[NUM_WM8350_REGULATORS]; + + /* LED devices */ + struct wm8350_led led[2]; }; int wm8350_register_regulator(struct wm8350 *wm8350, int reg, struct regulator_init_data *initdata); +int wm8350_register_led(struct wm8350 *wm8350, int lednum, int dcdc, int isink, + struct wm8350_led_platform_data *pdata); /* * Additional DCDC control not supported via regulator API -- cgit v1.2.3 From c835ee7f4154992e6cf0674d7ee136f5d36247a4 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Tue, 6 Jan 2009 21:00:19 +0000 Subject: backlight: Add suspend/resume support to the backlight core Add suspend/resume support to the backlight core and enable use of it by appropriate drivers. Signed-off-by: Richard Purdie --- include/linux/backlight.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 1ee9488ca2e4..79ca2da81c87 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -31,6 +31,10 @@ struct backlight_device; struct fb_info; struct backlight_ops { + unsigned int options; + +#define BL_CORE_SUSPENDRESUME (1 << 0) + /* Notify the backlight driver some property has changed */ int (*update_status)(struct backlight_device *); /* Return the current backlight brightness (accounting for power, @@ -51,7 +55,19 @@ struct backlight_properties { modes; 4: full off), see FB_BLANK_XXX */ int power; /* FB Blanking active? (values as for power) */ + /* Due to be removed, please use (state & BL_CORE_FBBLANK) */ int fb_blank; + /* Flags used to signal drivers of state changes */ + /* Upper 4 bits are reserved for driver internal use */ + unsigned int state; + +#define BL_CORE_SUSPENDED (1 << 0) /* backlight is suspended */ +#define BL_CORE_FBBLANK (1 << 1) /* backlight is under an fb blank event */ +#define BL_CORE_DRIVER4 (1 << 28) /* reserved for driver specific use */ +#define BL_CORE_DRIVER3 (1 << 29) /* reserved for driver specific use */ +#define BL_CORE_DRIVER2 (1 << 30) /* reserved for driver specific use */ +#define BL_CORE_DRIVER1 (1 << 31) /* reserved for driver specific use */ + }; struct backlight_device { -- cgit v1.2.3 From 1107ba885e46964316c083d441d5dd185b6c9e49 Mon Sep 17 00:00:00 2001 From: Alex Zeffertt Date: Wed, 7 Jan 2009 18:07:11 -0800 Subject: xen: add xenfs to allow usermode <-> Xen interaction The xenfs filesystem exports various interfaces to usermode. Initially this exports a file to allow usermode to interact with xenbus/xenstore. Traditionally this appeared in /proc/xen. Rather than extending procfs, this patch adds a backward-compat mountpoint on /proc/xen, and provides a xenfs filesystem which can be mounted there. Signed-off-by: Alex Zeffertt Signed-off-by: Jeremy Fitzhardinge Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/magic.h b/include/linux/magic.h index f7f3fdddbef0..439f6f3cb0c4 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -13,6 +13,7 @@ #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 +#define XENFS_SUPER_MAGIC 0xabba1974 #define EXT4_SUPER_MAGIC 0xEF53 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 -- cgit v1.2.3 From 18a82eb9f980b5e02cea651e4ecda26265d98933 Mon Sep 17 00:00:00 2001 From: Pekka J Enberg Date: Wed, 7 Jan 2009 18:07:19 -0800 Subject: ext2: allocate ->s_blockgroup_lock separately As spotted by kmemtrace, struct ext2_sb_info is 17024 bytes on 64-bit which makes it a very bad fit for SLAB allocators. The culprit of the wasted memory is ->s_blockgroup_lock which can be as big as 16 KB when NR_CPUS >= 32. To fix that, allocate ->s_blockgroup_lock, which fits nicely in a order 2 page in the worst case, separately. This shinks down struct ext2_sb_info enough to fit a 1 KB slab cache so now we allocate 16 KB + 1 KB instead of 32 KB saving 15 KB of memory. Acked-by: Andreas Dilger Signed-off-by: Pekka Enberg Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs_sb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h index dc541f3653d1..1cdb66367c98 100644 --- a/include/linux/ext2_fs_sb.h +++ b/include/linux/ext2_fs_sb.h @@ -101,7 +101,7 @@ struct ext2_sb_info { struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; + struct blockgroup_lock *s_blockgroup_lock; /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; struct rb_root s_rsv_window_root; @@ -111,7 +111,7 @@ struct ext2_sb_info { static inline spinlock_t * sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group) { - return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); + return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); } #endif /* _LINUX_EXT2_FS_SB */ -- cgit v1.2.3 From 0e090f1e05a563cc9acdda442767176bf1616001 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:20 -0800 Subject: ext2: don't inherit inappropriate inode flags from parent At present BTREE/INDEX is the only flag that new ext2 inodes do NOT inherit from their parent. In addition prevent the flags DIRTY, ECOMPR, INDEX, IMAGIC and TOPDIR from being inherited. List inheritable flags explicitly to prevent future flags from accidentally being inherited. This fixes the TOPDIR flag inheritance bug reported at http://bugzilla.kernel.org/show_bug.cgi?id=9866. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 78c775a83f7c..c3a051819363 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -194,6 +194,13 @@ struct ext2_group_desc #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ +/* Flags that should be inherited by new inodes from their parent. */ +#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\ + EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\ + EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\ + EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ + EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) + /* * ioctl commands */ -- cgit v1.2.3 From ef8b646183868b2d042fa6cde0eef2a31263ff85 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:21 -0800 Subject: ext2: tighten restrictions on inode flags At the moment there are few restrictions on which flags may be set on which inodes. Specifically DIRSYNC may only be set on directories and IMMUTABLE and APPEND may not be set on links. Tighten that to disallow TOPDIR being set on non-directories and only NODUMP and NOATIME to be set on non-regular file, non-directories. Introduces a flags masking function which masks flags based on mode and use it during inode creation and when flags are set via the ioctl to facilitate future consistency. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index c3a051819363..121720d74e15 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -201,6 +201,23 @@ struct ext2_group_desc EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define EXT2_REG_FLMASK (~(EXT2_DIRSYNC_FL | EXT2_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define EXT2_OTHER_FLMASK (EXT2_NODUMP_FL | EXT2_NOATIME_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & EXT2_REG_FLMASK; + else + return flags & EXT2_OTHER_FLMASK; +} + /* * ioctl commands */ -- cgit v1.2.3 From f420d4dc4272fd223986762df2ad06056ddebada Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Jan 2009 18:07:24 -0800 Subject: jbd: improve fsync batching There is a flaw with the way jbd handles fsync batching. If we fsync() a file and we were not the last person to run fsync() on this fs then we automatically sleep for 1 jiffie in order to wait for new writers to join into the transaction before forcing the commit. The problem with this is that with really fast storage (ie a Clariion) the time it takes to commit a transaction to disk is way faster than 1 jiffie in most cases, so sleeping means waiting longer with nothing to do than if we just committed the transaction and kept going. Ric Wheeler noticed this when using fs_mark with more than 1 thread, the throughput would plummet as he added more threads. This patch attempts to fix this problem by recording the average time in nanoseconds that it takes to commit a transaction to disk, and what time we started the transaction. If we run an fsync() and we have been running for less time than it takes to commit the transaction to disk, we sleep for the delta amount of time and then commit to disk. We acheive sub-jiffie sleeping using schedule_hrtimeout. This means that the wait time is auto-tuned to the speed of the underlying disk, instead of having this static timeout. I weighted the average according to somebody's comments (Andreas Dilger I think) in order to help normalize random outliers where we take way longer or way less time to commit than the average. I also have a min() check in there to make sure we don't sleep longer than a jiffie in case our storage is super slow, this was requested by Andrew. I unfortunately do not have access to a Clariion, so I had to use a ramdisk to represent a super fast array. I tested with a SATA drive with barrier=1 to make sure there was no regression with local disks, I tested with a 4 way multipathed Apple Xserve RAID array and of course the ramdisk. I ran the following command fs_mark -d /mnt/ext3-test -s 4096 -n 2000 -D 64 -t $i where $i was 2, 4, 8, 16 and 32. I mkfs'ed the fs each time. Here are my results type threads with patch without patch sata 2 24.6 26.3 sata 4 49.2 48.1 sata 8 70.1 67.0 sata 16 104.0 94.1 sata 32 153.6 142.7 xserve 2 246.4 222.0 xserve 4 480.0 440.8 xserve 8 829.5 730.8 xserve 16 1172.7 1026.9 xserve 32 1816.3 1650.5 ramdisk 2 2538.3 1745.6 ramdisk 4 2942.3 661.9 ramdisk 8 2882.5 999.8 ramdisk 16 2738.7 1801.9 ramdisk 32 2541.9 2394.0 Signed-off-by: Josef Bacik Cc: Andreas Dilger Cc: Arjan van de Ven Cc: Ric Wheeler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 346e2b80be7d..6384b19efe64 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -542,6 +542,11 @@ struct transaction_s */ unsigned long t_expires; + /* + * When this transaction started, in nanoseconds [no locking] + */ + ktime_t t_start_time; + /* * How many handles used this transaction? [t_handle_lock] */ @@ -798,8 +803,18 @@ struct journal_s struct buffer_head **j_wbuf; int j_wbufsize; + /* + * this is the pid of the last person to run a synchronous operation + * through the journal. + */ pid_t j_last_sync_writer; + /* + * the average amount of time in nanoseconds it takes to commit a + * transaction to the disk. [j_state_lock] + */ + u64 j_average_commit_time; + /* * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here -- cgit v1.2.3 From 5df096d67ec2b6578518caed7d57317a4b807aa1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 7 Jan 2009 18:07:25 -0800 Subject: ext3: allocate ->s_blockgroup_lock separately As spotted by kmemtrace, struct ext3_sb_info is 17152 bytes on 64-bit which makes it a very bad fit for SLAB allocators. The culprit of the wasted memory is ->s_blockgroup_lock which can be as big as 16 KB when NR_CPUS >= 32. To fix that, allocate ->s_blockgroup_lock, which fits nicely in a order 2 page in the worst case, separately. This shinks down struct ext3_sb_info enough to fit a 1 KB slab cache so now we allocate 16 KB + 1 KB instead of 32 KB saving 15 KB of memory. Acked-by: Andreas Dilger Signed-off-by: Pekka Enberg Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs_sb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index e024e38248ff..76fdc0f4b028 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -60,7 +60,7 @@ struct ext3_sb_info { struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; + struct blockgroup_lock *s_blockgroup_lock; /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; @@ -86,7 +86,7 @@ struct ext3_sb_info { static inline spinlock_t * sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group) { - return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); + return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); } #endif /* _LINUX_EXT3_FS_SB */ -- cgit v1.2.3 From 2e8671cb566da993425d324fc355af31edc6e7f1 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:26 -0800 Subject: ext3: don't inherit inappropriate inode flags from parent At present INDEX is the only flag that new ext3 inodes do NOT inherit from their parent. In addition prevent the flags DIRTY, ECOMPR, IMAGIC and TOPDIR from being inherited. List inheritable flags explicitly to prevent future flags from accidentally being inherited. This fixes the TOPDIR flag inheritance bug reported at http://bugzilla.kernel.org/show_bug.cgi?id=9866. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d14f02918483..b745619a9b8e 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -178,6 +178,13 @@ struct ext3_group_desc #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +/* Flags that should be inherited by new inodes from their parent. */ +#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\ + EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\ + EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\ + EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ + EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) + /* * Inode dynamic state flags */ -- cgit v1.2.3 From 04143e2fb9d512c21e1dcfb561dbb0445dcfdc8c Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 7 Jan 2009 18:07:26 -0800 Subject: ext3: tighten restrictions on inode flags At the moment there are few restrictions on which flags may be set on which inodes. Specifically DIRSYNC may only be set on directories and IMMUTABLE and APPEND may not be set on links. Tighten that to disallow TOPDIR being set on non-directories and only NODUMP and NOATIME to be set on non-regular file, non-directories. Introduces a flags masking function which masks flags based on mode and use it during inode creation and when flags are set via the ioctl to facilitate future consistency. Signed-off-by: Duane Griffin Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index b745619a9b8e..d76800f6ecf0 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -185,6 +185,23 @@ struct ext3_group_desc EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & EXT3_REG_FLMASK; + else + return flags & EXT3_OTHER_FLMASK; +} + /* * Inode dynamic state flags */ -- cgit v1.2.3 From b2aa30f7bb381e04c93eed106089ba55553955f1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 7 Jan 2009 18:07:37 -0800 Subject: cgroups: don't put struct cgroupfs_root protected by RCU We don't access struct cgroupfs_root in fast path, so we should not put struct cgroupfs_root protected by RCU But the comment in struct cgroup_subsys.root confuse us. struct cgroup_subsys.root is used in these places: 1 find_css_set(): if (ss->root->subsys_list.next == &ss->sibling) 2 rebind_subsystems(): if (ss->root != &rootnode) rcu_assign_pointer(ss->root, root); rcu_assign_pointer(subsys[i]->root, &rootnode); 3 cgroup_has_css_refs(): if (ss->root != cgrp->root) 4 cgroup_init_subsys(): ss->root = &rootnode; 5 proc_cgroupstats_show(): ss->name, ss->root->subsys_bits, ss->root->number_of_cgroups, !ss->disabled); 6 cgroup_clone(): root = subsys->root; if ((root != subsys->root) || All these place we have held cgroup_lock() or we don't dereference to struct cgroupfs_root. It's means wo don't need RCU when use struct cgroup_subsys.root, and we should not put struct cgroupfs_root protected by RCU. Signed-off-by: Lai Jiangshan Reviewed-by: Paul Menage Cc: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 08b78c09b09a..f68dfd8dd53a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -337,7 +337,6 @@ struct cgroup_subsys { #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; - /* Protected by RCU */ struct cgroupfs_root *root; struct list_head sibling; -- cgit v1.2.3 From a47295e6bc42ad35f9c15ac66f598aa24debd4e2 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:07:44 -0800 Subject: cgroups: make cgroup_path() RCU-safe Fix races between /proc/sched_debug by freeing cgroup objects via an RCU callback. Thus any cgroup reference obtained from an RCU-safe source will remain valid during the RCU section. Since dentries are also RCU-safe, this allows us to traverse up the tree safely. Additionally, make cgroup_path() check for a NULL cgrp->dentry to avoid trying to report a path for a partially-created cgroup. [lizf@cn.fujitsu.com: call deactive_super() in cgroup_diput()] Signed-off-by: Paul Menage Reviewed-by: Li Zefan Tested-by: Li Zefan Cc: Peter Zijlstra Signed-off-by: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f68dfd8dd53a..73d1c730c3c4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -116,7 +116,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry */ + struct dentry *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; @@ -145,6 +145,9 @@ struct cgroup { int pids_use_count; /* Length of the current tasks_pids array */ int pids_length; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; }; /* A css_set is a structure holding pointers to a set of -- cgit v1.2.3 From 7a81b88cb53e335ff7d019e6398c95792c817d93 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:48 -0800 Subject: memcg: introduce charge-commit-cancel style of functions There is a small race in do_swap_page(). When the page swapped-in is charged, the mapcount can be greater than 0. But, at the same time some process (shares it ) call unmap and make mapcount 1->0 and the page is uncharged. CPUA CPUB mapcount == 1. (1) charge if mapcount==0 zap_pte_range() (2) mapcount 1 => 0. (3) uncharge(). (success) (4) set page's rmap() mapcount 0=>1 Then, this swap page's account is leaked. For fixing this, I added a new interface. - charge account to res_counter by PAGE_SIZE and try to free pages if necessary. - commit register page_cgroup and add to LRU if necessary. - cancel uncharge PAGE_SIZE because of do_swap_page failure. CPUA (1) charge (always) (2) set page's rmap (mapcount > 0) (3) commit charge was necessary or not after set_pte(). This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting. Usual mem_cgroup_charge_common() does charge -> commit at a time. And this patch also adds following function to clarify all charges. - mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge() called against newly allocated anon pages. - mem_cgroup_charge_migrate_fixup() called only from remove_migration_ptes(). we'll have to rewrite this later.(this patch just keeps old behavior) This function will be removed by additional patch to make migration clearer. Good for clarifying "what we do" Then, we have 4 following charge points. - newpage - swap-in - add-to-cache. - migration. [akpm@linux-foundation.org: add missing inline directives to stubs] Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1fbe14d39521..c592f315cd02 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,8 +27,17 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, +extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +extern int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask); +/* for swap handling */ +extern int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **ptr); +extern void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *ptr); +extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); + extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); @@ -71,7 +80,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline int mem_cgroup_charge(struct page *page, +struct mem_cgroup; + +static inline int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; @@ -83,6 +94,27 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } +static inline int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) +{ + return 0; +} + +static inline int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **ptr) +{ + return 0; +} + +static inline void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *ptr) +{ +} + +static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr) +{ +} + static inline void mem_cgroup_uncharge_page(struct page *page) { } -- cgit v1.2.3 From 01b1ae63c2270cbacfd43fea94578c17950eb548 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:50 -0800 Subject: memcg: simple migration handling Now, management of "charge" under page migration is done under following manner. (Assume migrate page contents from oldpage to newpage) before - "newpage" is charged before migration. at success. - "oldpage" is uncharged at somewhere(unmap, radix-tree-replace) at failure - "newpage" is uncharged. - "oldpage" is charged if necessary (*1) But (*1) is not reliable....because of GFP_ATOMIC. This patch tries to change behavior as following by charge/commit/cancel ops. before - charge PAGE_SIZE (no target page) success - commit charge against "newpage". failure - commit charge against "oldpage". (PCG_USED bit works effectively to avoid double-counting) - if "oldpage" is obsolete, cancel charge of PAGE_SIZE. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c592f315cd02..b095f5f6ecf7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -29,8 +29,6 @@ struct mm_struct; extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern int mem_cgroup_charge_migrate_fixup(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ extern int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr); @@ -60,8 +58,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); ((cgroup) == mem_cgroup_from_task((mm)->owner)) extern int -mem_cgroup_prepare_migration(struct page *page, struct page *newpage); -extern void mem_cgroup_end_migration(struct page *page); +mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); +extern void mem_cgroup_end_migration(struct mem_cgroup *mem, + struct page *oldpage, struct page *newpage); /* * For memory reclaim. @@ -94,12 +93,6 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } -static inline int mem_cgroup_charge_migrate_fixup(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - return 0; -} - static inline int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr) { @@ -144,12 +137,14 @@ static inline int task_in_mem_cgroup(struct task_struct *task, } static inline int -mem_cgroup_prepare_migration(struct page *page, struct page *newpage) +mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) { return 0; } -static inline void mem_cgroup_end_migration(struct page *page) +static inline void mem_cgroup_end_migration(struct mem_cgroup *mem, + struct page *oldpage, + struct page *newpage) { } -- cgit v1.2.3 From d13d144309d2e5a3e6ad978b16c1d0226ddc9231 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:56 -0800 Subject: memcg: handle swap caches SwapCache support for memory resource controller (memcg) Before mem+swap controller, memcg itself should handle SwapCache in proper way. This is cut-out from it. In current memcg, SwapCache is just leaked and the user can create tons of SwapCache. This is a leak of account and should be handled. SwapCache accounting is done as following. charge (anon) - charged when it's mapped. (because of readahead, charge at add_to_swap_cache() is not sane) uncharge (anon) - uncharged when it's dropped from swapcache and fully unmapped. means it's not uncharged at unmap. Note: delete from swap cache at swap-in is done after rmap information is established. charge (shmem) - charged at swap-in. this prevents charge at add_to_page_cache(). uncharge (shmem) - uncharged when it's dropped from swapcache and not on shmem's radix-tree. at migration, check against 'old page' is modified to handle shmem. Comparing to the old version discussed (and caused troubles), we have advantages of - PCG_USED bit. - simple migrating handling. So, situation is much easier than several months ago, maybe. [hugh@veritas.com: memcg: handle swap caches build fix] Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 91dee50fe260..f8f3907533f0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -333,6 +333,22 @@ static inline void disable_swap_token(void) put_swap_token(swap_token_mm); } +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +extern int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked); +extern void mem_cgroup_uncharge_swapcache(struct page *page); +#else +static inline +int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + return 0; +} +static inline void mem_cgroup_uncharge_swapcache(struct page *page) +{ +} +#endif + #else /* CONFIG_SWAP */ #define nr_swap_pages 0L @@ -409,6 +425,12 @@ static inline swp_entry_t get_swap_page(void) #define has_swap_token(x) 0 #define disable_swap_token() do { } while(0) +static inline int mem_cgroup_cache_charge_swapin(struct page *page, + struct mm_struct *mm, gfp_t mask, bool locked) +{ + return 0; +} + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ -- cgit v1.2.3 From c077719be8e9e6b55702117513d1b5f41d80404a Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:57 -0800 Subject: memcg: mem+swap controller Kconfig Config and control variable for mem+swap controller. This patch adds CONFIG_CGROUP_MEM_RES_CTLR_SWAP (memory resource controller swap extension.) For accounting swap, it's obvious that we have to use additional memory to remember "who uses swap". This adds more overhead. So, it's better to offer "choice" to users. This patch adds 2 choices. This patch adds 2 parameters to enable swap extension or not. - CONFIG - boot option Reviewed-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b095f5f6ecf7..41b46cc9d1f1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -77,6 +77,9 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern int do_swap_account; +#endif #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; -- cgit v1.2.3 From 27a7faa0779dd13729196c1a818c294f44bbd1ee Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:58 -0800 Subject: memcg: swap cgroup for remembering usage For accounting swap, we need a record per swap entry, at least. This patch adds following function. - swap_cgroup_swapon() .... called from swapon - swap_cgroup_swapoff() ... called at the end of swapoff. - swap_cgroup_record() .... record information of swap entry. - swap_cgroup_lookup() .... lookup information of swap entry. This patch just implements "how to record information". No actual method for limit the usage of swap. These routine uses flat table to record and lookup. "wise" lookup system like radix-tree requires requires memory allocation at new records but swap-out is usually called under memory shortage (or memcg hits limit.) So, I used static allocation. (maybe dynamic allocation is not very hard but it adds additional memory allocation in memory shortage path.) Note1: In this, we use pointer to record information and this means 8bytes per swap entry. I think we can reduce this when we create "id of cgroup" in the range of 0-65535 or 0-255. Reported-by: Daisuke Nishimura Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Reported-by: Hugh Dickins Reported-by: Balbir Singh Reported-by: Andrew Morton Signed-off-by: KAMEZAWA Hiroyuki Cc: Pavel Emelianov Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 1e6d34bfa094..d754b2dfbf2d 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -104,5 +104,40 @@ static inline void page_cgroup_init(void) { } +#endif + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#include +extern struct mem_cgroup * +swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem); +extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent); +extern int swap_cgroup_swapon(int type, unsigned long max_pages); +extern void swap_cgroup_swapoff(int type); +#else +#include + +static inline +struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) +{ + return NULL; +} + +static inline +struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) +{ + return NULL; +} + +static inline int +swap_cgroup_swapon(int type, unsigned long max_pages) +{ + return 0; +} + +static inline void swap_cgroup_swapoff(int type) +{ + return; +} + #endif #endif -- cgit v1.2.3 From 8c7c6e34a1256a5082d38c8e9bd1474476912715 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:00 -0800 Subject: memcg: mem+swap controller core This patch implements per cgroup limit for usage of memory+swap. However there are SwapCache, double counting of swap-cache and swap-entry is avoided. Mem+Swap controller works as following. - memory usage is limited by memory.limit_in_bytes. - memory + swap usage is limited by memory.memsw_limit_in_bytes. This has following benefits. - A user can limit total resource usage of mem+swap. Without this, because memory resource controller doesn't take care of usage of swap, a process can exhaust all the swap (by memory leak.) We can avoid this case. And Swap is shared resource but it cannot be reclaimed (goes back to memory) until it's used. This characteristic can be trouble when the memory is divided into some parts by cpuset or memcg. Assume group A and group B. After some application executes, the system can be.. Group A -- very large free memory space but occupy 99% of swap. Group B -- under memory shortage but cannot use swap...it's nearly full. Ability to set appropriate swap limit for each group is required. Maybe someone wonder "why not swap but mem+swap ?" - The global LRU(kswapd) can swap out arbitrary pages. Swap-out means to move account from memory to swap...there is no change in usage of mem+swap. In other words, when we want to limit the usage of swap without affecting global LRU, mem+swap limit is better than just limiting swap. Accounting target information is stored in swap_cgroup which is per swap entry record. Charge is done as following. map - charge page and memsw. unmap - uncharge page/memsw if not SwapCache. swap-out (__delete_from_swap_cache) - uncharge page - record mem_cgroup information to swap_cgroup. swap-in (do_swap_page) - charged as page and memsw. record in swap_cgroup is cleared. memsw accounting is decremented. swap-free (swap_free()) - if swap entry is freed, memsw is uncharged by PAGE_SIZE. There are people work under never-swap environments and consider swap as something bad. For such people, this mem+swap controller extension is just an overhead. This overhead is avoided by config or boot option. (see Kconfig. detail is not in this patch.) TODO: - maybe more optimization can be don in swap-in path. (but not very safe.) But we just do simple accounting at this stage. [nishimura@mxp.nes.nec.co.jp: make resize limit hold mutex] [hugh@veritas.com: memswap controller core swapcache fixes] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Daisuke Nishimura Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- include/linux/swap.h | 14 +++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 41b46cc9d1f1..ca51ac72d6c0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,6 +32,8 @@ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, /* for swap handling */ extern int mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **ptr); +extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, gfp_t mask, struct mem_cgroup **ptr); extern void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr); extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); @@ -80,7 +82,6 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -97,7 +98,13 @@ static inline int mem_cgroup_cache_charge(struct page *page, } static inline int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr) + gfp_t gfp_mask, struct mem_cgroup **ptr) +{ + return 0; +} + +static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, + struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index f8f3907533f0..be938ce4895a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,7 +214,7 @@ static inline void lru_cache_add_active_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask); + gfp_t gfp_mask, bool noswap); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; @@ -336,7 +336,7 @@ static inline void disable_swap_token(void) #ifdef CONFIG_CGROUP_MEM_RES_CTLR extern int mem_cgroup_cache_charge_swapin(struct page *page, struct mm_struct *mm, gfp_t mask, bool locked); -extern void mem_cgroup_uncharge_swapcache(struct page *page); +extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); #else static inline int mem_cgroup_cache_charge_swapin(struct page *page, @@ -344,7 +344,15 @@ int mem_cgroup_cache_charge_swapin(struct page *page, { return 0; } -static inline void mem_cgroup_uncharge_swapcache(struct page *page) +static inline void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ +} +#endif +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern void mem_cgroup_uncharge_swap(swp_entry_t ent); +#else +static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) { } #endif -- cgit v1.2.3 From 08e552c69c6930d64722de3ec18c51844d06ee28 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:01 -0800 Subject: memcg: synchronized LRU A big patch for changing memcg's LRU semantics. Now, - page_cgroup is linked to mem_cgroup's its own LRU (per zone). - LRU of page_cgroup is not synchronous with global LRU. - page and page_cgroup is one-to-one and statically allocated. - To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as - lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc); - SwapCache is handled. And, when we handle LRU list of page_cgroup, we do following. pc = lookup_page_cgroup(page); lock_page_cgroup(pc); .....................(1) mz = page_cgroup_zoneinfo(pc); spin_lock(&mz->lru_lock); .....add to LRU spin_unlock(&mz->lru_lock); unlock_page_cgroup(pc); But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock. So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct. This is a trial to remove this dirty nesting of locks. This patch changes mz->lru_lock to be zone->lru_lock. Then, above sequence will be written as spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU mem_cgroup_add/remove/etc_lru() { pc = lookup_page_cgroup(page); mz = page_cgroup_zoneinfo(pc); if (PageCgroupUsed(pc)) { ....add to LRU } spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU This is much simpler. (*) We're safe even if we don't take lock_page_cgroup(pc). Because.. 1. When pc->mem_cgroup can be modified. - at charge. - at account_move(). 2. at charge the PCG_USED bit is not set before pc->mem_cgroup is fixed. 3. at account_move() the page is isolated and not on LRU. Pros. - easy for maintenance. - memcg can make use of laziness of pagevec. - we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup. - LRU status of memcg will be synchronized with global LRU's one. - # of locks are reduced. - account_move() is simplified very much. Cons. - may increase cost of LRU rotation. (no impact if memcg is not configured.) Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 29 +++++++++++++++++++++++++++-- include/linux/mm_inline.h | 3 +++ include/linux/page_cgroup.h | 17 ----------------- 3 files changed, 30 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ca51ac72d6c0..32c07b1852d6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -40,7 +40,12 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); +extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru); +extern void mem_cgroup_del_lru(struct page *page); +extern void mem_cgroup_move_lists(struct page *page, + enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); @@ -131,7 +136,27 @@ static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) return 0; } -static inline void mem_cgroup_move_lists(struct page *page, bool active) +static inline void mem_cgroup_add_lru_list(struct page *page, int lru) +{ +} + +static inline void mem_cgroup_del_lru_list(struct page *page, int lru) +{ + return ; +} + +static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru) +{ + return ; +} + +static inline void mem_cgroup_del_lru(struct page *page) +{ + return ; +} + +static inline void +mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to) { } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index c948350c378e..37ef13d0f01e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -28,6 +28,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) { list_add(&page->lru, &zone->lru[l].list); __inc_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_add_lru_list(page, l); } static inline void @@ -35,6 +36,7 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) { list_del(&page->lru); __dec_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_del_lru_list(page, l); } static inline void @@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page) l += page_is_file_cache(page); } __dec_zone_state(zone, NR_LRU_BASE + l); + mem_cgroup_del_lru_list(page, l); } /** diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index d754b2dfbf2d..602cc1fdee90 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -26,10 +26,6 @@ enum { PCG_LOCK, /* page cgroup is locked */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ - /* flags for LRU placement */ - PCG_ACTIVE, /* page is active in this cgroup */ - PCG_FILE, /* page is file system backed */ - PCG_UNEVICTABLE, /* page is unevictableable */ }; #define TESTPCGFLAG(uname, lname) \ @@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE) TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) -/* LRU management flags (from global-lru definition) */ -TESTPCGFLAG(File, FILE) -SETPCGFLAG(File, FILE) -CLEARPCGFLAG(File, FILE) - -TESTPCGFLAG(Active, ACTIVE) -SETPCGFLAG(Active, ACTIVE) -CLEARPCGFLAG(Active, ACTIVE) - -TESTPCGFLAG(Unevictable, UNEVICTABLE) -SETPCGFLAG(Unevictable, UNEVICTABLE) -CLEARPCGFLAG(Unevictable, UNEVICTABLE) - static inline int page_cgroup_nid(struct page_cgroup *pc) { return page_to_nid(pc->page); -- cgit v1.2.3 From f8d665422603ee1b8ed04dcad4242f14d623c941 Mon Sep 17 00:00:00 2001 From: Hirokazu Takahashi Date: Wed, 7 Jan 2009 18:08:02 -0800 Subject: memcg: add mem_cgroup_disabled() We check mem_cgroup is disabled or not by checking mem_cgroup_subsys.disabled. I think it has more references than expected, now. replacing if (mem_cgroup_subsys.disabled) with if (mem_cgroup_disabled()) give us good look, I think. [kamezawa.hiroyu@jp.fujitsu.com: fix typo] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 32c07b1852d6..472efd09118c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -19,7 +19,7 @@ #ifndef _LINUX_MEMCONTROL_H #define _LINUX_MEMCONTROL_H - +#include struct mem_cgroup; struct page_cgroup; struct page; @@ -87,6 +87,14 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif + +static inline bool mem_cgroup_disabled(void) +{ + if (mem_cgroup_subsys.disabled) + return true; + return false; +} + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -214,6 +222,11 @@ static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, { return 0; } + +static inline bool mem_cgroup_disabled(void) +{ + return true; +} #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 28dbc4b6a01fb579a9441c7b81e3d3413dc452df Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 7 Jan 2009 18:08:05 -0800 Subject: memcg: memory cgroup resource counters for hierarchy Add support for building hierarchies in resource counters. Cgroups allows us to build a deep hierarchy, but we currently don't link the resource counters belonging to the memory controller control groups, in the same fashion as the corresponding cgroup entries in the cgroup hierarchy. This patch provides the infrastructure for resource counters that have the same hiearchy as their cgroup counter parts. These set of patches are based on the resource counter hiearchy patches posted by Pavel Emelianov. NOTE: Building hiearchies is expensive, deeper hierarchies imply charging the all the way up to the root. It is known that hiearchies are expensive, so the user needs to be careful and aware of the trade-offs before creating very deep ones. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Balbir Singh Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Li Zefan Cc: David Rientjes Cc: Pavel Emelianov Cc: Dhaval Giani Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 271c1c2c9f6f..dede0a2cfc45 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -43,6 +43,10 @@ struct res_counter { * the routines below consider this to be IRQ-safe */ spinlock_t lock; + /* + * Parent counter, used for hierarchial resource accounting + */ + struct res_counter *parent; }; /** @@ -87,7 +91,7 @@ enum { * helpers for accounting */ -void res_counter_init(struct res_counter *counter); +void res_counter_init(struct res_counter *counter, struct res_counter *parent); /* * charge - try to consume more resource. @@ -103,7 +107,7 @@ void res_counter_init(struct res_counter *counter); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val); + unsigned long val, struct res_counter **limit_fail_at); /* * uncharge - tell that some portion of the resource is released -- cgit v1.2.3 From 2e4d40915fb85207fe48cfc31201824ec6d7426e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 7 Jan 2009 18:08:07 -0800 Subject: memcontrol: rcu_read_lock() to protect mm_match_cgroup() mm_match_cgroup() calls cgroup_subsys_state(). We must use rcu_read_lock() to protect cgroup_subsys_state(). Signed-off-by: Lai Jiangshan Cc: Paul Menage Reviewed-by: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 472efd09118c..2de6504e01fb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -61,8 +61,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); -#define mm_match_cgroup(mm, cgroup) \ - ((cgroup) == mem_cgroup_from_task((mm)->owner)) +static inline +int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) +{ + struct mem_cgroup *mem; + rcu_read_lock(); + mem = mem_cgroup_from_task((mm)->owner); + rcu_read_unlock(); + return cgroup == mem; +} extern int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); -- cgit v1.2.3 From a636b327f731143ccc544b966cfd8de6cb6d72c6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:08 -0800 Subject: memcg: avoid unnecessary system-wide-oom-killer Current mmtom has new oom function as pagefault_out_of_memory(). It's added for select bad process rathar than killing current. When memcg hit limit and calls OOM at page_fault, this handler called and system-wide-oom handling happens. (means kernel panics if panic_on_oom is true....) To avoid overkill, check memcg's recent behavior before starting system-wide-oom. And this patch also fixes to guarantee "don't accnout against process with TIF_MEMDIE". This is necessary for smooth OOM. [akpm@linux-foundation.org: build fix] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Badari Pulavarty Cc: Jan Blunck Cc: Hirokazu Takahashi Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2de6504e01fb..2fdd1380bf0a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -102,6 +102,8 @@ static inline bool mem_cgroup_disabled(void) return false; } +extern bool mem_cgroup_oom_called(struct task_struct *task); + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -234,6 +236,11 @@ static inline bool mem_cgroup_disabled(void) { return true; } + +static inline bool mem_cgroup_oom_called(struct task_struct *task) +{ + return false; +} #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 2c26fdd70c3094fa3e84caf9ef434911933d5477 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:10 -0800 Subject: memcg: revert gfp mask fix My patch, memcg-fix-gfp_mask-of-callers-of-charge.patch changed gfp_mask of callers of charge to be GFP_HIGHUSER_MOVABLE for showing what will happen at memory reclaim. But in recent discussion, it's NACKed because it sounds ugly. This patch is for reverting it and add some clean up to gfp_mask of callers of charge. No behavior change but need review before generating HUNK in deep queue. This patch also adds explanation to meaning of gfp_mask passed to charge functions in memcontrol.h. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2fdd1380bf0a..59ac95a64508 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -26,6 +26,16 @@ struct page; struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR +/* + * All "charge" functions with gfp_mask should use GFP_KERNEL or + * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't + * alloc memory but reclaims memory from all available zones. So, "where I want + * memory from" bits of gfp_mask has no meaning. So any bits of that field is + * available but adding a rule is better. charge functions' gfp_mask should + * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous + * codes. + * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) + */ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -- cgit v1.2.3 From f89eb90e33fd4e4e0cc1a6d20afd63c5a561885a Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:14 -0800 Subject: inactive_anon_is_low: move to vmscan The inactive_anon_is_low() is called only vmscan. Then it can move to vmscan.c This patch doesn't have any functional change. Reviewd-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 37ef13d0f01e..7fbb97267556 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -81,23 +81,4 @@ static inline enum lru_list page_lru(struct page *page) return lru; } -/** - * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * - * Returns true if the zone does not have enough inactive anon pages, - * meaning some active anon pages need to be deactivated. - */ -static inline int inactive_anon_is_low(struct zone *zone) -{ - unsigned long active, inactive; - - active = zone_page_state(zone, NR_ACTIVE_ANON); - inactive = zone_page_state(zone, NR_INACTIVE_ANON); - - if (inactive * zone->inactive_ratio < active) - return 1; - - return 0; -} #endif -- cgit v1.2.3 From 6e9015716ae9b59e9635d692fddfcfb9582c146c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:15 -0800 Subject: mm: introduce zone_reclaim struct Add zone_reclam_stat struct for later enhancement. A later patch uses this. This patch doesn't any behavior change (yet). Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 35a7b5e19465..09c14e213b63 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -263,6 +263,19 @@ enum zone_type { #error ZONES_SHIFT -- too many zones configured adjust calculation #endif +struct zone_reclaim_stat { + /* + * The pageout code in vmscan.c keeps track of how many of the + * mem/swap backed and file backed pages are refeferenced. + * The higher the rotated/scanned ratio, the more valuable + * that cache is. + * + * The anon LRU stats live in [0], file LRU stats in [1] + */ + unsigned long recent_rotated[2]; + unsigned long recent_scanned[2]; +}; + struct zone { /* Fields commonly accessed by the page allocator */ unsigned long pages_min, pages_low, pages_high; @@ -315,16 +328,7 @@ struct zone { unsigned long nr_scan; } lru[NR_LRU_LISTS]; - /* - * The pageout code in vmscan.c keeps track of how many of the - * mem/swap backed and file backed pages are refeferenced. - * The higher the rotated/scanned ratio, the more valuable - * that cache is. - * - * The anon LRU stats live in [0], file LRU stats in [1] - */ - unsigned long recent_rotated[2]; - unsigned long recent_scanned[2]; + struct zone_reclaim_stat reclaim_stat; unsigned long pages_scanned; /* since last reclaim */ unsigned long flags; /* zone flags, see below */ -- cgit v1.2.3 From 14797e2363c2b2f1ce139fd1c5a215e4e05aa1d9 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:18 -0800 Subject: memcg: add inactive_anon_is_low() The inactive_anon_is_low() is key component of active/inactive anon balancing on reclaim. However current inactive_anon_is_low() function only consider global reclaim. Therefore, we need following ugly scan_global_lru() condition. if (lru == LRU_ACTIVE_ANON && (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; it cause that memcg reclaim always deactivate pages when shrink_list() is called. To make mem_cgroup_inactive_anon_is_low() improve active/inactive anon balancing of memcgroup. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Cyrill Gorcunov Cc: "Pekka Enberg" Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 59ac95a64508..aad9377c9828 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -100,6 +100,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, + struct zone *zone); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -251,6 +253,13 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task) { return false; } + +static inline int +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +{ + return 1; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From a3d8e0549d913e30968fa02e505dfe02c0a23e0d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:19 -0800 Subject: memcg: add mem_cgroup_zone_nr_pages() Introduce mem_cgroup_zone_nr_pages(). It is called by zone_nr_pages() helper function. This patch doesn't have any behavior change. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Acked-by: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index aad9377c9828..b1defd6a2783 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -102,6 +102,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone); +unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, + struct zone *zone, + enum lru_list lru); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -260,6 +263,14 @@ mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) return 1; } +static inline unsigned long +mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, + enum lru_list lru) +{ + return 0; +} + + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 3e2f41f1f64744f7942980d93cc93dd3e5924560 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:20 -0800 Subject: memcg: add zone_reclaim_stat Introduce mem_cgroup_per_zone::reclaim_stat member and its statics collecting function. Now, get_scan_ratio() can calculate correct value on memcg reclaim. [hugh@veritas.com: avoid reclaim_stat oops when disabled] Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b1defd6a2783..36b8ebb39b82 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -105,6 +105,10 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, enum lru_list lru); +struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, + struct zone *zone); +struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat_from_page(struct page *page); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -271,6 +275,18 @@ mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, } +static inline struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone) +{ + return NULL; +} + +static inline struct zone_reclaim_stat* +mem_cgroup_get_reclaim_stat_from_page(struct page *page) +{ + return NULL; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 9439c1c95b5c25b8031b2a7eb7e1590eb84be7f5 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:21 -0800 Subject: memcg: remove mem_cgroup_cal_reclaim() Now, get_scan_ratio() return correct value although memcg reclaim. Then, mem_cgroup_calc_reclaim() can be removed. So, memcg reclaim get the same capability of anon/file reclaim balancing as global reclaim now. Acked-by: KAMEZAWA Hiroyuki Acked-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 36b8ebb39b82..8752052da8df 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -97,9 +97,6 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority); extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority); - -extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, - int priority, enum lru_list lru); int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone); unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, @@ -244,13 +241,6 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, { } -static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, - struct zone *zone, int priority, - enum lru_list lru) -{ - return 0; -} - static inline bool mem_cgroup_disabled(void) { return true; -- cgit v1.2.3 From a7885eb8ad465ec9db99ac5b5e6680f0ca8e11c8 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:24 -0800 Subject: memcg: swappiness Currently, /proc/sys/vm/swappiness can change swappiness ratio for global reclaim. However, memcg reclaim doesn't have tuning parameter for itself. In general, the optimal swappiness depend on workload. (e.g. hpc workload need to low swappiness than the others.) Then, per cgroup swappiness improve administrator tunability. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index be938ce4895a..4ccca25d0f05 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap); + gfp_t gfp_mask, bool noswap, + unsigned int swappiness); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; -- cgit v1.2.3 From c772be939e078afd2505ede7d596a30f8f61de95 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 7 Jan 2009 18:08:25 -0800 Subject: memcg: fix calculation of active_ratio Currently, inactive_ratio of memcg is calculated at setting limit. because page_alloc.c does so and current implementation is straightforward porting. However, memcg introduced hierarchy feature recently. In hierarchy restriction, memory limit is not only decided memory.limit_in_bytes of current cgroup, but also parent limit and sibling memory usage. Then, The optimal inactive_ratio is changed frequently. So, everytime calculation is better. Tested-by: KAMEZAWA Hiroyuki Acked-by: KAMEZAWA Hiroyuki Signed-off-by: KOSAKI Motohiro Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8752052da8df..056cf82c0e86 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -97,8 +97,7 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority); extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority); -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, - struct zone *zone); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg); unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, enum lru_list lru); @@ -252,7 +251,7 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task) } static inline int -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) { return 1; } -- cgit v1.2.3 From a5e924f5f8abf97944e625d74967cc9452cfbce8 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Wed, 7 Jan 2009 18:08:28 -0800 Subject: memcg: remove mem_cgroup_try_charge After previous patch, mem_cgroup_try_charge is not used by anyone, so we can remove it. Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 056cf82c0e86..8ae6ece8c962 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -40,8 +40,6 @@ struct mm_struct; extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ -extern int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr); extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t mask, struct mem_cgroup **ptr); extern void mem_cgroup_commit_charge_swapin(struct page *page, @@ -134,12 +132,6 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } -static inline int mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **ptr) -{ - return 0; -} - static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr) { -- cgit v1.2.3 From b5a84319a4343a0db753436fd8147e61eaafa7ea Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:35 -0800 Subject: memcg: fix shmem's swap accounting Now, you can see following even when swap accounting is enabled. 1. Create Group 01, and 02. 2. allocate a "file" on tmpfs by a task under 01. 3. swap out the "file" (by memory pressure) 4. Read "file" from a task in group 02. 5. the charge of "file" is moved to group 02. This is not ideal behavior. This is because SwapCache which was loaded by read-ahead is not taken into account.. This is a patch to fix shmem's swapcache behavior. - remove mem_cgroup_cache_charge_swapin(). - Add SwapCache handler routine to mem_cgroup_cache_charge(). By this, shmem's file cache is charged at add_to_page_cache() with GFP_NOWAIT. - pass the page of swapcache to shrink_mem_cgroup. Signed-off-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Paul Menage Cc: Li Zefan Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++-- include/linux/swap.h | 8 -------- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8ae6ece8c962..326f45c86530 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -56,7 +56,8 @@ extern void mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); +extern int mem_cgroup_shrink_usage(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, @@ -155,7 +156,8 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) +static inline int mem_cgroup_shrink_usage(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index 4ccca25d0f05..d30215578877 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -335,16 +335,8 @@ static inline void disable_swap_token(void) } #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern int mem_cgroup_cache_charge_swapin(struct page *page, - struct mm_struct *mm, gfp_t mask, bool locked); extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); #else -static inline -int mem_cgroup_cache_charge_swapin(struct page *page, - struct mm_struct *mm, gfp_t mask, bool locked) -{ - return 0; -} static inline void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) { -- cgit v1.2.3 From 999cd8a450f8f93701669a61cac4d3b19eca07e8 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:08:36 -0800 Subject: cgroups: add a per-subsystem hierarchy_mutex These patches introduce new locking/refcount support for cgroups to reduce the need for subsystems to call cgroup_lock(). This will ultimately allow the atomicity of cgroup_rmdir() (which was removed recently) to be restored. These three patches give: 1/3 - introduce a per-subsystem hierarchy_mutex which a subsystem can use to prevent changes to its own cgroup tree 2/3 - use hierarchy_mutex in place of calling cgroup_lock() in the memory controller 3/3 - introduce a css_tryget() function similar to the one recently proposed by Kamezawa, but avoiding spurious refcount failures in the event of a race between a css_tryget() and an unsuccessful cgroup_rmdir() Future patches will likely involve: - using hierarchy mutex in place of cgroup_lock() in more subsystems where appropriate - restoring the atomicity of cgroup_rmdir() with respect to cgroup_create() This patch: Add a hierarchy_mutex to the cgroup_subsys object that protects changes to the hierarchy observed by that subsystem. It is taken by the cgroup subsystem (in addition to cgroup_mutex) for the following operations: - linking a cgroup into that subsystem's cgroup tree - unlinking a cgroup from that subsystem's cgroup tree - moving the subsystem to/from a hierarchy (including across the bind() callback) Thus if the subsystem holds its own hierarchy_mutex, it can safely traverse its own hierarchy. Signed-off-by: Paul Menage Tested-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 73d1c730c3c4..ce1c1f34c30c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -340,8 +340,23 @@ struct cgroup_subsys { #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; - struct cgroupfs_root *root; + /* + * Protects sibling/children links of cgroups in this + * hierarchy, plus protects which hierarchy (or none) the + * subsystem is a part of (i.e. root/sibling). To avoid + * potential deadlocks, the following operations should not be + * undertaken while holding any hierarchy_mutex: + * + * - allocating memory + * - initiating hotplug events + */ + struct mutex hierarchy_mutex; + /* + * Link to parent, and list entry in parent's children. + * Protected by this->hierarchy_mutex and cgroup_lock() + */ + struct cgroupfs_root *root; struct list_head sibling; }; -- cgit v1.2.3 From e7c5ec9193d32b9559a3bb8893ceedbda85201ff Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 7 Jan 2009 18:08:38 -0800 Subject: cgroups: add css_tryget() Add css_tryget(), that obtains a counted reference on a CSS. It is used in situations where the caller has a "weak" reference to the CSS, i.e. one that does not protect the cgroup from removal via a reference count, but would instead be cleaned up by a destroy() callback. css_tryget() will return true on success, or false if the cgroup is being removed. This is similar to Kamezawa Hiroyuki's patch from a week or two ago, but with the difference that in the event of css_tryget() racing with a cgroup_rmdir(), css_tryget() will only return false if the cgroup really does get removed. This implementation is done by biasing css->refcnt, so that a refcnt of 1 means "releasable" and 0 means "released or releasing". In the event of a race, css_tryget() distinguishes between "released" and "releasing" by checking for the CSS_REMOVED flag in css->flags. Signed-off-by: Paul Menage Tested-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ce1c1f34c30c..e267e62827bb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -52,9 +52,9 @@ struct cgroup_subsys_state { * hierarchy structure */ struct cgroup *cgroup; - /* State maintained by the cgroup system to allow - * subsystems to be "busy". Should be accessed via css_get() - * and css_put() */ + /* State maintained by the cgroup system to allow subsystems + * to be "busy". Should be accessed via css_get(), + * css_tryget() and and css_put(). */ atomic_t refcnt; @@ -64,11 +64,14 @@ struct cgroup_subsys_state { /* bits in struct cgroup_subsys_state flags field */ enum { CSS_ROOT, /* This CSS is the root of the subsystem */ + CSS_REMOVED, /* This CSS is dead */ }; /* - * Call css_get() to hold a reference on the cgroup; - * + * Call css_get() to hold a reference on the css; it can be used + * for a reference obtained via: + * - an existing ref-counted reference to the css + * - task->cgroups for a locked task */ static inline void css_get(struct cgroup_subsys_state *css) @@ -77,9 +80,32 @@ static inline void css_get(struct cgroup_subsys_state *css) if (!test_bit(CSS_ROOT, &css->flags)) atomic_inc(&css->refcnt); } + +static inline bool css_is_removed(struct cgroup_subsys_state *css) +{ + return test_bit(CSS_REMOVED, &css->flags); +} + +/* + * Call css_tryget() to take a reference on a css if your existing + * (known-valid) reference isn't already ref-counted. Returns false if + * the css has been destroyed. + */ + +static inline bool css_tryget(struct cgroup_subsys_state *css) +{ + if (test_bit(CSS_ROOT, &css->flags)) + return true; + while (!atomic_inc_not_zero(&css->refcnt)) { + if (test_bit(CSS_REMOVED, &css->flags)) + return false; + } + return true; +} + /* * css_put() should be called to release a reference taken by - * css_get() + * css_get() or css_tryget() */ extern void __css_put(struct cgroup_subsys_state *css); -- cgit v1.2.3 From 6af866af34a96fed24a55979a78b6f73bd4e8e87 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 7 Jan 2009 18:08:45 -0800 Subject: cpuset: remove remaining pointers to cpumask_t Impact: cleanups, use new cpumask API Final trivial cleanups: mainly s/cpumask_t/struct cpumask Note there is a FIXME in generate_sched_domains(). A future patch will change struct cpumask *doms to struct cpumask *doms[]. (I suppose Rusty will do this.) Signed-off-by: Li Zefan Cc: Ingo Molnar Cc: Rusty Russell Acked-by: Mike Travis Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 51ea2bdea0f9..90c6074a36ca 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -20,8 +20,9 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask); -extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask); +extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); +extern void cpuset_cpus_allowed_locked(struct task_struct *p, + struct cpumask *mask); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -86,12 +87,13 @@ static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask) +static inline void cpuset_cpus_allowed(struct task_struct *p, + struct cpumask *mask) { *mask = cpu_possible_map; } static inline void cpuset_cpus_allowed_locked(struct task_struct *p, - cpumask_t *mask) + struct cpumask *mask) { *mask = cpu_possible_map; } -- cgit v1.2.3 From f9fb860f67b9542cd78d1558dec7058092b57d8e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2009 18:08:46 -0800 Subject: pid: implement ns_of_pid A current problem with the pid namespace is that it is easy to do pid related work after exit_task_namespaces which drops the nsproxy pointer. However if we are doing pid namespace related work we are always operating on some struct pid which retains the pid_namespace pointer of the pid namespace it was allocated in. So provide ns_of_pid which allows us to find the pid namespace a pid was allocated in. Using this we have the needed infrastructure to do pid namespace related work at anytime we have a struct pid, removing the chance of accidentally having a NULL pointer dereference when accessing current->nsproxy. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: Bastian Blank Cc: Pavel Emelyanov Cc: Nadia Derbey Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index bb206c56d1f0..49f1c2f66e95 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -122,6 +122,24 @@ int next_pidmap(struct pid_namespace *pid_ns, int last); extern struct pid *alloc_pid(struct pid_namespace *ns); extern void free_pid(struct pid *pid); +/* + * ns_of_pid() returns the pid namespace in which the specified pid was + * allocated. + * + * NOTE: + * ns_of_pid() is expected to be called for a process (task) that has + * an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid + * is expected to be non-NULL. If @pid is NULL, caller should handle + * the resulting NULL pid-ns. + */ +static inline struct pid_namespace *ns_of_pid(struct pid *pid) +{ + struct pid_namespace *ns = NULL; + if (pid) + ns = pid->numbers[pid->level].ns; + return ns; +} + /* * the helpers to get the pid's id seen from different namespaces * -- cgit v1.2.3 From 61bce0f1371cfff497fe85594fd39d1a0b15ebe1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2009 18:08:49 -0800 Subject: pid: generalize task_active_pid_ns Currently task_active_pid_ns is not safe to call after a task becomes a zombie and exit_task_namespaces is called, as nsproxy becomes NULL. By reading the pid namespace from the pid of the task we can trivially solve this problem at the cost of one extra memory read in what should be the same cacheline as we read the namespace from. When moving things around I have made task_active_pid_ns out of line because keeping it in pid_namespace.h would require adding includes of pid.h and sched.h that I don't think we want. This change does make task_active_pid_ns unsafe to call during copy_process until we attach a pid on the task_struct which seems to be a reasonable trade off. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: Bastian Blank Cc: Pavel Emelyanov Cc: Nadia Derbey Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index d82fe825d62f..38d10326246a 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -79,11 +79,7 @@ static inline void zap_pid_ns_processes(struct pid_namespace *ns) } #endif /* CONFIG_PID_NS */ -static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) -{ - return tsk->nsproxy->pid_ns; -} - +extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pidmap_init(void); -- cgit v1.2.3 From f06295b44c296c8fb08823a3118468ae343b60f2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Jan 2009 18:08:52 -0800 Subject: ELF: implement AT_RANDOM for glibc PRNG seeding While discussing[1] the need for glibc to have access to random bytes during program load, it seems that an earlier attempt to implement AT_RANDOM got stalled. This implements a random 16 byte string, available to every ELF program via a new auxv AT_RANDOM vector. [1] http://sourceware.org/ml/libc-alpha/2008-10/msg00006.html Ulrich said: glibc needs right after startup a bit of random data for internal protections (stack canary etc). What is now in upstream glibc is that we always unconditionally open /dev/urandom, read some data, and use it. For every process startup. That's slow. ... The solution is to provide a limited amount of random data to the starting process in the aux vector. I suggested 16 bytes and this is what the patch implements. If we need only 16 bytes or less we use the data directly. If we need more we'll use the 16 bytes to see a PRNG. This avoids the costly /dev/urandom use and it allows the kernel to use the most adequate source of random data for this purpose. It might not be the same pool as that for /dev/urandom. Concerns were expressed about the depletion of the randomness pool. But this patch doesn't make the situation worse, it doesn't deplete entropy more than happens now. Signed-off-by: Kees Cook Cc: Jakub Jelinek Cc: Andi Kleen Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auxvec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index d7afa9dd6635..f3b5d4e3a2ac 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -23,16 +23,16 @@ #define AT_PLATFORM 15 /* string identifying CPU for optimizations */ #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ #define AT_CLKTCK 17 /* frequency at which times() increments */ - +/* AT_* values 18 through 22 are reserved */ #define AT_SECURE 23 /* secure mode boolean */ - #define AT_BASE_PLATFORM 24 /* string identifying real platform, may * differ from AT_PLATFORM. */ +#define AT_RANDOM 25 /* address of 16 random bytes */ #define AT_EXECFN 31 /* filename of program */ #ifdef __KERNEL__ -#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif -- cgit v1.2.3 From 91f68b7359144aa40bb9668124543d15284750b4 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 7 Jan 2009 18:09:12 -0800 Subject: generic swap(): introduce global macro swap(a, b) There have been some local definitions of swap(), it's time to replace them all with a uniform one. Signed-off-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6b8e2027165e..343df9ef2412 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -476,6 +476,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte) __val = __val < __min ? __min: __val; \ __val > __max ? __max: __val; }) + +/* + * swap - swap value of @a and @b + */ +#define swap(a, b) ({ typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; }) + /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. -- cgit v1.2.3 From 859cb7f2a4244ea6da206d3fe9cc8a6810947a68 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Thu, 8 Jan 2009 17:55:03 +0000 Subject: leds: Add suspend/resume to the core class Add suspend/resume to the core class and remove all the now unneeded code from various drivers. Originally the class code couldn't support suspend/resume but since class_device can there is no reason for each driver doing its own suspend/resume anymore. --- include/linux/leds.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 3c1a8ce6a5ea..24489da701e3 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -32,7 +32,10 @@ struct led_classdev { int brightness; int flags; + /* Lower 16 bits reflect status */ #define LED_SUSPENDED (1 << 0) + /* Upper 16 bits reflect control information */ +#define LED_CORE_SUSPENDRESUME (1 << 16) /* Set LED brightness level */ /* Must not sleep, use a workqueue if needed */ -- cgit v1.2.3 From 69279fb9a95051971ac03e558c4d46e7ba84ab3a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 31 Dec 2008 12:52:41 +0000 Subject: regulator: Clean up kerneldoc warnings Remove kerneldoc warnings that don't relate to missing documentation, mostly by renaming parameters in the documentation to match their actual names. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/consumer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index afdc4558bb94..801bf77ff4e2 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -104,10 +104,10 @@ struct regulator; /** * struct regulator_bulk_data - Data used for bulk regulator operations. * - * @supply The name of the supply. Initialised by the user before - * using the bulk regulator APIs. - * @consumer The regulator consumer for the supply. This will be managed - * by the bulk API. + * @supply: The name of the supply. Initialised by the user before + * using the bulk regulator APIs. + * @consumer: The regulator consumer for the supply. This will be managed + * by the bulk API. * * The regulator APIs provide a series of regulator_bulk_() API calls as * a convenience to consumers which require multiple supplies. This -- cgit v1.2.3 From c8e7e4640facbe99d10a6e262523b25be129b9b9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 31 Dec 2008 12:52:42 +0000 Subject: regulator: Add missing kerneldoc This is only the documentation that the kerneldoc system warns about. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 40 +++++++++++++++++++++++++++++++++++++- include/linux/regulator/machine.h | 41 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index e37d80561985..84c3737c2d26 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -24,7 +24,37 @@ struct regulator_init_data; /** * struct regulator_ops - regulator operations. * - * This struct describes regulator operations. + * This struct describes regulator operations which can be implemented by + * regulator chip drivers. + * + * @enable: Enable the regulator. + * @disable: Disable the regulator. + * @is_enabled: Return 1 if the reguator is enabled, 0 otherwise. + * + * @set_voltage: Set the voltage for the regulator within the range specified. + * The driver should select the voltage closest to min_uV. + * @get_voltage: Return the currently configured voltage for the regulator. + * + * @set_current: Set the current for the regulator within the range specified. + * The driver should select the current closest to min_uA. + * @get_current: Return the currently configured current for the regulator. + * + * @set_current_limit: Configure a limit for a current-limited regulator. + * @get_current_limit: Get the limit for a current-limited regulator. + * + * @set_mode: Set the operating mode for the regulator. + * @get_mode: Get the current operating mode for the regulator. + * @get_optimum_mode: Get the most efficient operating mode for the regulator + * when running with the specified parameters. + * + * @set_suspend_voltage: Set the voltage for the regulator when the system + * is suspended. + * @set_suspend_enable: Mark the regulator as enabled when the system is + * suspended. + * @set_suspend_disable: Mark the regulator as disabled when the system is + * suspended. + * @set_suspend_mode: Set the operating mode for the regulator when the + * system is suspended. */ struct regulator_ops { @@ -75,6 +105,14 @@ enum regulator_type { /** * struct regulator_desc - Regulator descriptor * + * Each regulator registered with the core is described with a structure of + * this type. + * + * @name: Identifying name for the regulator. + * @id: Numerical identifier for the regulator. + * @ops: Regulator operations table. + * @type: Indicates if the regulator is a voltage or current regulator. + * @owner: Module providing the regulator, used for refcounting. */ struct regulator_desc { const char *name; diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index c6d69331a81e..3794773b23d2 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -44,6 +44,10 @@ struct regulator; * struct regulator_state - regulator state during low power syatem states * * This describes a regulators state during a system wide low power state. + * + * @uV: Operating voltage during suspend. + * @mode: Operating mode during suspend. + * @enabled: Enabled during suspend. */ struct regulator_state { int uV; /* suspend voltage */ @@ -55,6 +59,30 @@ struct regulator_state { * struct regulation_constraints - regulator operating constraints. * * This struct describes regulator and board/machine specific constraints. + * + * @name: Descriptive name for the constraints, used for display purposes. + * + * @min_uV: Smallest voltage consumers may set. + * @max_uV: Largest voltage consumers may set. + * + * @min_uA: Smallest consumers consumers may set. + * @max_uA: Largest current consumers may set. + * + * @valid_modes_mask: Mask of modes which may be configured by consumers. + * @valid_ops_mask: Operations which may be performed by consumers. + * + * @always_on: Set if the regulator should never be disabled. + * @boot_on: Set if the regulator is enabled when the system is initially + * started. + * @apply_uV: Apply the voltage constraint when initialising. + * + * @input_uV: Input voltage for regulator when supplied by another regulator. + * + * @state_disk: State for regulator when system is suspended in disk mode. + * @state_mem: State for regulator when system is suspended in mem mode. + * @state_standby: State for regulator when system is suspended in standby + * mode. + * @initial_state: Suspend state to set by default. */ struct regulation_constraints { @@ -93,6 +121,9 @@ struct regulation_constraints { * struct regulator_consumer_supply - supply -> device mapping * * This maps a supply name to a device. + * + * @dev: Device structure for the consumer. + * @supply: Name for the supply. */ struct regulator_consumer_supply { struct device *dev; /* consumer */ @@ -103,6 +134,16 @@ struct regulator_consumer_supply { * struct regulator_init_data - regulator platform initialisation data. * * Initialisation constraints, our supply and consumers supplies. + * + * @supply_regulator_dev: Parent regulator (if any). + * + * @constraints: Constraints. These must be specified for the regulator to + * be usable. + * @num_consumer_supplies: Number of consumer device supplies. + * @consumer_supplies: Consumer device supply configuration. + * + * @regulator_init: Callback invoked when the regulator has been registered. + * @driver_data: Data passed to regulator_init. */ struct regulator_init_data { struct device *supply_regulator_dev; /* or NULL for LINE */ -- cgit v1.2.3 From 0ba4887c6329043d6cee5b5b477cfe50c2b57674 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 Jan 2009 11:50:23 -0800 Subject: regulator: fix kernel-doc warnings Fix kernel-doc warnings in regulator/driver.h: Warning(linux-next-20090108//include/linux/regulator/driver.h:95): Excess struct/union/enum/typedef member 'set_current' description in 'regulator_ops' Warning(linux-next-20090108//include/linux/regulator/driver.h:95): Excess struct/union/enum/typedef member 'get_current' description in 'regulator_ops' Warning(linux-next-20090108//include/linux/regulator/driver.h:124): No description found for parameter 'irq' Signed-off-by: Randy Dunlap cc: Liam Girdwood cc: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 84c3737c2d26..2dae05705f13 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -29,16 +29,12 @@ struct regulator_init_data; * * @enable: Enable the regulator. * @disable: Disable the regulator. - * @is_enabled: Return 1 if the reguator is enabled, 0 otherwise. + * @is_enabled: Return 1 if the regulator is enabled, 0 otherwise. * * @set_voltage: Set the voltage for the regulator within the range specified. * The driver should select the voltage closest to min_uV. * @get_voltage: Return the currently configured voltage for the regulator. * - * @set_current: Set the current for the regulator within the range specified. - * The driver should select the current closest to min_uA. - * @get_current: Return the currently configured current for the regulator. - * * @set_current_limit: Configure a limit for a current-limited regulator. * @get_current_limit: Get the limit for a current-limited regulator. * @@ -111,6 +107,7 @@ enum regulator_type { * @name: Identifying name for the regulator. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. + * @irq: Interrupt number for the regulator. * @type: Indicates if the regulator is a voltage or current regulator. * @owner: Module providing the regulator, used for refcounting. */ -- cgit v1.2.3 From f4f6bda00fc6bf995a35d8246db45aacaa9b3f09 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 3 Dec 2008 08:48:52 +0000 Subject: backlight: add support for Toppoly TDO35S series to tdo24m lcd driver Signed-off-by: Mike Rapoport Acked-by: Eric Miao Signed-off-by: Richard Purdie --- include/linux/spi/tdo24m.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/spi/tdo24m.h (limited to 'include/linux') diff --git a/include/linux/spi/tdo24m.h b/include/linux/spi/tdo24m.h new file mode 100644 index 000000000000..7572d4e1fe76 --- /dev/null +++ b/include/linux/spi/tdo24m.h @@ -0,0 +1,13 @@ +#ifndef __TDO24M_H__ +#define __TDO24M_H__ + +enum tdo24m_model { + TDO24M, + TDO35S, +}; + +struct tdo24m_platform_data { + enum tdo24m_model model; +}; + +#endif /* __TDO24M_H__ */ -- cgit v1.2.3 From 0c3573f19d135d718264e38c46597295bd6154b7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:05 +1100 Subject: md: use sysfs_notify_dirent to notify changes to md/sync_action. There is no compelling need for this, but sysfs_notify_dirent is a nicer interface and the change is good for consistency. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8fc909ef6787..663803eaf0de 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -244,6 +244,7 @@ struct mddev_s struct sysfs_dirent *sysfs_state; /* handle for 'array_state' * file in sysfs. */ + struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ -- cgit v1.2.3 From 019c4e2f3e02aac4b44003913b54ca4b332e4371 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:06 +1100 Subject: md: raid0: Represent device offset in sectors. Rename zone->dev_offset to zone->dev_start to make sure all users have been converted. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index 1b2dda035f8e..61c3d29dc158 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -6,7 +6,7 @@ struct strip_zone { sector_t zone_offset; /* Zone offset in md_dev */ - sector_t dev_offset; /* Zone offset in real dev */ + sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t size; /* Zone size */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ -- cgit v1.2.3 From 6199d3db0fc34f8ada895879d04a353a6ae632bc Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0: Represent zone->zone_offset in sectors. For the same reason as in the previous patch, rename it from zone_offset to zone_start. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index 61c3d29dc158..eaf4f6ac55f6 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -5,7 +5,7 @@ struct strip_zone { - sector_t zone_offset; /* Zone offset in md_dev */ + sector_t zone_start; /* Zone offset in md_dev (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t size; /* Zone size */ int nb_dev; /* # of devices attached to the zone */ -- cgit v1.2.3 From 83838ed87898e0a8ff8dbf001e54e6c017f0a011 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0: Represent the size of strip zones in sectors. This completes the block -> sector conversion of struct strip_zone. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index eaf4f6ac55f6..c12521d027e2 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -7,7 +7,7 @@ struct strip_zone { sector_t zone_start; /* Zone offset in md_dev (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ - sector_t size; /* Zone size */ + sector_t sectors; /* Zone size in sectors */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ }; -- cgit v1.2.3 From ccacc7d2cf03114a24ab903f710118e9e5d43273 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: raid0: make hash_spacing and preshift sector-based. This patch renames the hash_spacing and preshift members of struct raid0_private_data to spacing and sector_shift respectively and changes the semantics as follows: We always have spacing = 2 * hash_spacing. In case sizeof(sector_t) > sizeof(u32) we also have sector_shift = preshift + 1 while sector_shift = preshift = 0 otherwise. Note that the values of nb_zone and zone are unaffected by these changes because in the sector_div() preceeding the assignement of these two variables both arguments double. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/raid0.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index c12521d027e2..fd42aa87c391 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -19,8 +19,8 @@ struct raid0_private_data mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ int nr_strip_zones; - sector_t hash_spacing; - int preshift; /* shift this before divide by hash_spacing */ + sector_t spacing; + int sector_shift; /* shift this before divide by spacing */ }; typedef struct raid0_private_data raid0_conf_t; -- cgit v1.2.3 From 159ec1fc060ab22b157a62364045f5e98749c4d3 Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: use list_for_each_entry macro directly The rdev_for_each macro defined in is identical to list_for_each_entry_safe, from , it should be defined to use list_for_each_entry_safe, instead of reinventing the wheel. But some calls to each_entry_safe don't really need a safe version, just a direct list_for_each_entry is enough, this could save a temp variable (tmp) in every function that used rdev_for_each. In this patch, most rdev_for_each loops are replaced by list_for_each_entry, totally save many tmp vars; and only in the other situations that will call list_del to delete an entry, the safe version is used. Signed-off-by: Cheng Renquan Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 663803eaf0de..8f9a54c1fb0e 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -335,17 +335,14 @@ static inline char * mdname (mddev_t * mddev) * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. */ -#define rdev_for_each_list(rdev, tmp, list) \ - \ - for ((tmp) = (list).next; \ - (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ - (tmp) = (tmp)->next, (tmp)->prev != &(list) \ - ; ) +#define rdev_for_each_list(rdev, tmp, head) \ + list_for_each_entry_safe(rdev, tmp, head, same_set) + /* * iterates through the 'same array disks' ringlist */ #define rdev_for_each(rdev, tmp, mddev) \ - rdev_for_each_list(rdev, tmp, (mddev)->disks) + list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) #define rdev_for_each_rcu(rdev, mddev) \ list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) -- cgit v1.2.3 From cd2ac9321c26dc7a76455cd2a4df89123fa2b73e Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: need another print_sb for mdp_superblock_1 md_print_devices is called in two code path: MD_BUG(...), and md_ioctl with PRINT_RAID_DEBUG. it will dump out all in use md devices information; However, it wrongly processed two types of superblock in one: The header file has defined two types of superblock, struct mdp_superblock_s (typedefed with mdp_super_t) according to md with metadata 0.90, and struct mdp_superblock_1 according to md with metadata 1.0 and later, These two types of superblock are very different, The md_print_devices code processed them both in mdp_super_t, that would lead to wrong informaton dump like: [ 6742.345877] [ 6742.345887] md: ********************************** [ 6742.345890] md: * * [ 6742.345892] md: ********************************** [ 6742.345896] md1: [ 6742.345907] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3 [ 6742.345909] md: rdev superblock: [ 6742.345914] md: SB: (V:0.90.0) ID:<42ef13c7.598c059a.5f9f1645.801e9ee6> CT:4919856d [ 6742.345918] md: L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536 [ 6742.345922] md: UT:4919856d ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:b7992907 E:00000001 [ 6742.345924] D 0: DISK [ 6742.345930] D 1: DISK [ 6742.345933] D 2: DISK [ 6742.345937] D 3: DISK [ 6742.345942] md: THIS: DISK ... [ 6742.346058] md0: [ 6742.346067] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3 [ 6742.346070] md: rdev superblock: [ 6742.346073] md: SB: (V:1.0.0) ID:<369aad81.00000000.00000000.00000000> CT:9a322a9c [ 6742.346077] md: L-1507699579 S976570180 ND:48 RD:0 md0 LO:65536 CS:196610 [ 6742.346081] md: UT:00000018 ST:0 AD:131048 WD:0 FD:8 SD:0 CSUM:00000000 E:00000000 [ 6742.346084] D 0: DISK [ 6742.346089] D 1: DISK [ 6742.346092] D 2: DISK [ 6742.346096] D 3: DISK [ 6742.346102] md: THIS: DISK ... [ 6742.346219] md: ********************************** [ 6742.346221] Here md1 is metadata 0.90.0, and md0 is metadata 1.2 After some more code to distinguish these two types of superblock, in this patch, it will generate dump information like: [ 7906.755790] [ 7906.755799] md: ********************************** [ 7906.755802] md: * * [ 7906.755804] md: ********************************** [ 7906.755808] md1: [ 7906.755819] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3 [ 7906.755821] md: rdev superblock (MJ:0): [ 7906.755826] md: SB: (V:0.90.0) ID:<3fca7a0d.a612bfed.5f9f1645.801e9ee6> CT:491989f3 [ 7906.755830] md: L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536 [ 7906.755834] md: UT:491989f3 ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:00fb52ad E:00000001 [ 7906.755836] D 0: DISK [ 7906.755842] D 1: DISK [ 7906.755845] D 2: DISK [ 7906.755849] D 3: DISK [ 7906.755855] md: THIS: DISK ... [ 7906.755972] md0: [ 7906.755981] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3 [ 7906.755984] md: rdev superblock (MJ:1): [ 7906.755989] md: SB: (V:1) (F:0) Array-ID:<5fbcf158:55aa:5fbe:9a79:1e939880dcbd> [ 7906.755990] md: Name: "DG5:0" CT:1226410480 [ 7906.755998] md: L5 SZ130944 RD:4 LO:2 CS:128 DO:24 DS:131048 SO:8 RO:0 [ 7906.755999] md: Dev:00000003 UUID: 9194d744:87f7:a448:85f2:7497b84ce30a [ 7906.756001] md: (F:0) UT:1226410480 Events:0 ResyncOffset:-1 CSUM:0dbcd829 [ 7906.756003] md: (MaxDev:384) ... [ 7906.756113] md: ********************************** [ 7906.756116] this md0 (metadata 1.2) information dumping is exactly according to struct mdp_superblock_1. Signed-off-by: Cheng Renquan Cc: Neil Brown Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: NeilBrown --- include/linux/raid/md_p.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 8b4de4a41ff1..9491026afe66 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) { return (ev<<32)| sb->events_lo; } +#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1) + /* * The version-1 superblock : * All numeric fields are little-endian. -- cgit v1.2.3 From d3374825ce57ba2214d375023979f6197ccc1385 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:10 +1100 Subject: md: make devices disappear when they are no longer needed. Currently md devices, once created, never disappear until the module is unloaded. This is essentially because the gendisk holds a reference to the mddev, and the mddev holds a reference to the gendisk, this a circular reference. If we drop the reference from mddev to gendisk, then we need to ensure that the mddev is destroyed when the gendisk is destroyed. However it is not possible to hook into the gendisk destruction process to enable this. So we drop the reference from the gendisk to the mddev and destroy the gendisk when the mddev gets destroyed. However this has a complication. Between the call __blkdev_get->get_gendisk->kobj_lookup->md_probe and the call __blkdev_get->md_open there is no obvious way to hold a reference on the mddev any more, so unless something is done, it will disappear and gendisk will be destroyed prematurely. Also, once we decide to destroy the mddev, there will be an unlockable moment before the gendisk is unlinked (blk_unregister_region) during which a new reference to the gendisk can be created. We need to ensure that this reference can not be used. i.e. the ->open must fail. So: 1/ in md_probe we set a flag in the mddev (hold_active) which indicates that the array should be treated as active, even though there are no references, and no appearance of activity. This is cleared by md_release when the device is closed if it is no longer needed. This ensures that the gendisk will survive between md_probe and md_open. 2/ In md_open we check if the mddev we expect to open matches the gendisk that we did open. If there is a mismatch we return -ERESTARTSYS and modify __blkdev_get to retry from the top in that case. In the -ERESTARTSYS sys case we make sure to wait until the old gendisk (that we succeeded in opening) is really gone so we loop at most once. Some udev configurations will always open an md device when it first appears. If we allow an md device that was just created by an open to disappear on an immediate close, then this can race with such udev configurations and result in an infinite loop the device being opened and closed, then re-open due to the 'ADD' even from the first open, and then close and so on. So we make sure an md device, once created by an open, remains active at least until some md 'ioctl' has been made on it. This means that all normal usage of md devices will allow them to disappear promptly when not needed, but the worst that an incorrect usage will do it cause an inactive md device to be left in existence (it can easily be removed). As an array can be stopped by writing to a sysfs attribute echo clear > /sys/block/mdXXX/md/array_state we need to use scheduled work for deleting the gendisk and other kobjects. This allows us to wait for any pending gendisk deletion to complete by simply calling flush_scheduled_work(). Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8f9a54c1fb0e..e3d17c7f954e 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -137,6 +137,8 @@ struct mddev_s struct gendisk *gendisk; struct kobject kobj; + int hold_active; +#define UNTIL_IOCTL 1 /* Superblock information */ int major_version, @@ -246,6 +248,8 @@ struct mddev_s */ struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ + struct work_struct del_work; /* used for delayed sysfs removal */ + spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ atomic_t pending_writes; /* number of active superblock writes */ -- cgit v1.2.3 From efeb53c0e57213e843b7ef3cc6ebcdea7d6186ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:10 +1100 Subject: md: Allow md devices to be created by name. Using sequential numbers to identify md devices is somewhat artificial. Using names can be a lot more user-friendly. Also, creating md devices by opening the device special file is a bit awkward. So this patch provides a new option for creating and naming devices. Writing a name such as "md_home" to /sys/modules/md_mod/parameters/new_array will cause an array with that name to be created. It will appear in /sys/block/ /proc/partitions and /proc/mdstat as 'md_home'. It will have an arbitrary minor number allocated. md devices that a created by an open are destroyed on the last close when the device is inactive. For named md devices, they will not be destroyed until the array is explicitly stopped, either with the STOP_ARRAY ioctl or by writing 'clear' to /sys/block/md_XXXX/md/array_state. The name of the array must start 'md_' to avoid conflict with other devices. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index e3d17c7f954e..dac4217194b8 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -139,6 +139,7 @@ struct mddev_s struct kobject kobj; int hold_active; #define UNTIL_IOCTL 1 +#define UNTIL_STOP 2 /* Superblock information */ int major_version, -- cgit v1.2.3 From 4044ba58dd15cb01797c4fd034f39ef4a75f7cc3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:11 +1100 Subject: md: don't retry recovery of raid1 that fails due to error on source drive. If a raid1 has only one working drive and it has a sector which gives an error on read, then an attempt to recover onto a spare will fail, but as the single remaining drive is not removed from the array, the recovery will be immediately re-attempted, resulting in an infinite recovery loop. So detect this situation and don't retry recovery once an error on the lone remaining drive is detected. Allow recovery to be retried once every time a spare is added in case the problem wasn't actually a media error. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index dac4217194b8..9743e4dbc918 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -218,6 +218,9 @@ struct mddev_s #define MD_RECOVERY_FROZEN 9 unsigned long recovery; + int recovery_disabled; /* if we detect that recovery + * will always fail, set this + * so we don't loop trying */ int in_sync; /* know to not need resync */ struct mutex reconfig_mutex; -- cgit v1.2.3 From 871af1210f13966ab911ed2166e4ab2ce775b99d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 5 Jan 2009 14:16:39 +0000 Subject: libata: Add 32bit PIO support This matters for some controllers and in one or two cases almost doubles PIO performance. Add a bmdma32 operations set we can inherit and activate it for some controllers Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3449de597eff..4f7c8fb4d3fe 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1518,6 +1518,7 @@ extern void sata_pmp_error_handler(struct ata_port *ap); extern const struct ata_port_operations ata_sff_port_ops; extern const struct ata_port_operations ata_bmdma_port_ops; +extern const struct ata_port_operations ata_bmdma32_port_ops; /* PIO only, sg_tablesize and dma_boundary limits can be removed */ #define ATA_PIO_SHT(drv_name) \ @@ -1545,6 +1546,8 @@ extern void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf); extern unsigned int ata_sff_data_xfer(struct ata_device *dev, unsigned char *buf, unsigned int buflen, int rw); +extern unsigned int ata_sff_data_xfer32(struct ata_device *dev, + unsigned char *buf, unsigned int buflen, int rw); extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, unsigned int buflen, int rw); extern u8 ata_sff_irq_on(struct ata_port *ap); -- cgit v1.2.3 From fefae48bf8caab7d56ee4f8181f06602cf73d29e Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Thu, 8 Jan 2009 19:21:27 +0100 Subject: [MTD] CFI: remove major/minor version check for command set 0x0002 The NOR Flash memory K8P2815UQB from Samsung uses the major version number '0'. Add a quirk to cope with it. Signed-off-by: Wolfgang Grandegger Signed-off-by: David Woodhouse --- include/linux/mtd/cfi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 00e2b575021f..88d3d8fbf9f2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -520,6 +520,7 @@ struct cfi_fixup { #define CFI_MFR_AMD 0x0001 #define CFI_MFR_ATMEL 0x001F +#define CFI_MFR_SAMSUNG 0x00EC #define CFI_MFR_ST 0x0020 /* STMicroelectronics */ void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups); -- cgit v1.2.3 From 8dd2f36f317569665e454268a2677cfba3e848f1 Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sat, 2 Aug 2008 22:51:52 +0200 Subject: mISDN: Add feature via MISDN_CTRL_FILL_EMPTY to fill fifo if empty This prevents underrun of fifo when filled and in case of an underrun it prevents subsequent underruns due to jitter. Improve dsp, so buffers are kept filled with a certain delay, so moderate jitter will not cause underrun all the time -> the audio quality is highly improved. tones are not interrupted by gaps anymore, except when CPU is stalling or in high load. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNhw.h | 25 +++++++++++++------------ include/linux/mISDNif.h | 1 + 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index e794dfb87504..9384b92dfc65 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -57,20 +57,21 @@ #define FLG_L2DATA 14 /* channel use L2 DATA primitivs */ #define FLG_ORIGIN 15 /* channel is on origin site */ /* channel specific stuff */ +#define FLG_FILLEMPTY 16 /* fill fifo on first frame (empty) */ /* arcofi specific */ -#define FLG_ARCOFI_TIMER 16 -#define FLG_ARCOFI_ERROR 17 +#define FLG_ARCOFI_TIMER 17 +#define FLG_ARCOFI_ERROR 18 /* isar specific */ -#define FLG_INITIALIZED 16 -#define FLG_DLEETX 17 -#define FLG_LASTDLE 18 -#define FLG_FIRST 19 -#define FLG_LASTDATA 20 -#define FLG_NMD_DATA 21 -#define FLG_FTI_RUN 22 -#define FLG_LL_OK 23 -#define FLG_LL_CONN 24 -#define FLG_DTMFSEND 25 +#define FLG_INITIALIZED 17 +#define FLG_DLEETX 18 +#define FLG_LASTDLE 19 +#define FLG_FIRST 20 +#define FLG_LASTDATA 21 +#define FLG_NMD_DATA 22 +#define FLG_FTI_RUN 23 +#define FLG_LL_OK 24 +#define FLG_LL_CONN 25 +#define FLG_DTMFSEND 26 /* workq events */ #define FLG_RECVQUEUE 30 diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 8f2d60da04e7..74c903cd7a0a 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -312,6 +312,7 @@ clear_channelmap(u_int nr, u_char *map) #define MISDN_CTRL_SETPEER 0x0040 #define MISDN_CTRL_UNSETPEER 0x0080 #define MISDN_CTRL_RX_OFF 0x0100 +#define MISDN_CTRL_FILL_EMPTY 0x0200 #define MISDN_CTRL_HW_FEATURES_OP 0x2000 #define MISDN_CTRL_HW_FEATURES 0x2001 #define MISDN_CTRL_HFC_OP 0x4000 -- cgit v1.2.3 From 8b6015f736125050722dbe59c4f943e78cd626f0 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Tue, 12 Aug 2008 10:12:09 +0200 Subject: mISDN: Added an ioctl to change the device name To get persistent device names with hotplug we need to rename devices sometime. Signed-off-by: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 74c903cd7a0a..be09476ed854 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -36,8 +36,8 @@ * - should be incremented on every checkin */ #define MISDN_MAJOR_VERSION 1 -#define MISDN_MINOR_VERSION 0 -#define MISDN_RELEASE 19 +#define MISDN_MINOR_VERSION 1 +#define MISDN_RELEASE 20 /* primitives for information exchange * generell format @@ -255,16 +255,6 @@ struct sockaddr_mISDN { unsigned char tei; }; -/* timer device ioctl */ -#define IMADDTIMER _IOR('I', 64, int) -#define IMDELTIMER _IOR('I', 65, int) -/* socket ioctls */ -#define IMGETVERSION _IOR('I', 66, int) -#define IMGETCOUNT _IOR('I', 67, int) -#define IMGETDEVINFO _IOR('I', 68, int) -#define IMCTRLREQ _IOR('I', 69, int) -#define IMCLEAR_L2 _IOR('I', 70, int) - struct mISDNversion { unsigned char major; unsigned char minor; @@ -281,6 +271,23 @@ struct mISDN_devinfo { char name[MISDN_MAX_IDLEN]; }; +struct mISDN_devrename { + u_int id; + char name[MISDN_MAX_IDLEN]; /* new name */ +}; + +/* timer device ioctl */ +#define IMADDTIMER _IOR('I', 64, int) +#define IMDELTIMER _IOR('I', 65, int) + +/* socket ioctls */ +#define IMGETVERSION _IOR('I', 66, int) +#define IMGETCOUNT _IOR('I', 67, int) +#define IMGETDEVINFO _IOR('I', 68, int) +#define IMCTRLREQ _IOR('I', 69, int) +#define IMCLEAR_L2 _IOR('I', 70, int) +#define IMSETDEVNAME _IOR('I', 71, struct mISDN_devrename) + static inline int test_channelmap(u_int nr, u_char *map) { -- cgit v1.2.3 From 837468d135dcc49cdabc9fa92fc9550479f60704 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Sat, 16 Aug 2008 00:04:33 +0200 Subject: mISDN: Use struct device name field struct device already has a 'name' member, use it. Signed-off-by: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index be09476ed854..a59febb6143a 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -431,7 +431,6 @@ struct mISDN_sock { struct mISDNdevice { struct mISDNchannel D; u_int id; - char name[MISDN_MAX_IDLEN]; u_int Dprotocols; u_int Bprotocols; u_int nrbchan; -- cgit v1.2.3 From 1f28fa19d34c0d9186f274e61e4b3dcfc6428c5c Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Wed, 3 Sep 2008 15:17:45 +0200 Subject: mISDN: Add E-Channel logging features New prim PH_DATA_E_IND. - all E-ch frames are indicated by recv_Echannel(), which pushes E-Channel frames into dch's rqueue - if dchannel is opened with channel nr 0, no E-Channel logging is requested - if dchannel is opened with channel nr 1, E-Channel logging is requested. if layer1 does not support that, -EINVAL in return is appropriate Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNhw.h | 1 + include/linux/mISDNif.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index 9384b92dfc65..97ffdc1d3442 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -184,6 +184,7 @@ extern void queue_ch_frame(struct mISDNchannel *, u_int, extern int dchannel_senddata(struct dchannel *, struct sk_buff *); extern int bchannel_senddata(struct bchannel *, struct sk_buff *); extern void recv_Dchannel(struct dchannel *); +extern void recv_Echannel(struct dchannel *, struct dchannel *); extern void recv_Bchannel(struct bchannel *); extern void recv_Dchannel_skb(struct dchannel *, struct sk_buff *); extern void recv_Bchannel_skb(struct bchannel *, struct sk_buff *); diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index a59febb6143a..f75d596c5316 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -80,6 +80,7 @@ #define PH_DEACTIVATE_IND 0x0202 #define PH_DEACTIVATE_CNF 0x4202 #define PH_DATA_IND 0x2002 +#define PH_DATA_E_IND 0x3002 #define MPH_ACTIVATE_IND 0x0502 #define MPH_DEACTIVATE_IND 0x0602 #define MPH_INFORMATION_IND 0x0702 -- cgit v1.2.3 From 3bd69ad197a4a3d0085a5dc3b5796111bf176b12 Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sat, 6 Sep 2008 09:03:46 +0200 Subject: mISDN: Add ISDN sample clock API to mISDN core Add ISDN sample clock API to mISDN core (new file clock.c) hfcmulti and mISDNdsp use clock API. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index f75d596c5316..364f1018f0d1 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -371,6 +371,7 @@ struct mISDN_ctrl_req { #define DEBUG_L2_TEI 0x00100000 #define DEBUG_L2_TEIFSM 0x00200000 #define DEBUG_TIMER 0x01000000 +#define DEBUG_CLOCK 0x02000000 #define mISDN_HEAD_P(s) ((struct mISDNhead *)&s->cb[0]) #define mISDN_HEAD_PRIM(s) (((struct mISDNhead *)&s->cb[0])->prim) @@ -384,6 +385,7 @@ struct mISDN_ctrl_req { struct mISDNchannel; struct mISDNdevice; struct mISDNstack; +struct mISDNclock; struct channel_req { u_int protocol; @@ -460,6 +462,16 @@ struct mISDNstack { #endif }; +typedef int (clockctl_func_t)(void *, int); + +struct mISDNclock { + struct list_head list; + char name[64]; + int pri; + clockctl_func_t *ctl; + void *priv; +}; + /* global alloc/queue functions */ static inline struct sk_buff * @@ -510,8 +522,13 @@ extern int mISDN_register_device(struct mISDNdevice *, char *name); extern void mISDN_unregister_device(struct mISDNdevice *); extern int mISDN_register_Bprotocol(struct Bprotocol *); extern void mISDN_unregister_Bprotocol(struct Bprotocol *); +extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, + void *); +extern void mISDN_unregister_clock(struct mISDNclock *); extern void set_channel_address(struct mISDNchannel *, u_int, u_int); +extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); +extern unsigned short mISDN_clock_get(void); #endif /* __KERNEL__ */ #endif /* mISDNIF_H */ -- cgit v1.2.3 From 02282eee56b75a35e6bbc42cc34c9005eb1653f4 Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Mon, 8 Sep 2008 15:57:48 +0200 Subject: mISDN: Add ISDN_P_TE_UP0 / ISDN_P_NT_UP0 - new layer1 protocols for UP0 bus - helper #defines to test for TE/NT/S0/E1/UP0 Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 364f1018f0d1..7f65aa0c1cc5 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -200,6 +200,18 @@ #define ISDN_P_NT_S0 0x02 #define ISDN_P_TE_E1 0x03 #define ISDN_P_NT_E1 0x04 +#define ISDN_P_TE_UP0 0x05 +#define ISDN_P_NT_UP0 0x06 + +#define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \ + (p == ISDN_P_TE_UP0)) +#define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \ + (p == ISDN_P_NT_UP0)) +#define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0)) +#define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1)) +#define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0)) + + #define ISDN_P_LAPD_TE 0x10 #define ISDN_P_LAPD_NT 0x11 -- cgit v1.2.3 From 1b4d33121f1d991f6ae226cc3333428ff87627bb Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Sun, 14 Sep 2008 12:30:18 +0200 Subject: mISDN: Fix deactivation, if peer IP is removed from l1oip instance. Added GETPEER operation. Socket now checks if device is already busy at a differen mode. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 7f65aa0c1cc5..3f9988849f32 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -204,9 +204,9 @@ #define ISDN_P_NT_UP0 0x06 #define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \ - (p == ISDN_P_TE_UP0)) + (p == ISDN_P_TE_UP0) || (p == ISDN_P_LAPD_TE)) #define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \ - (p == ISDN_P_NT_UP0)) + (p == ISDN_P_NT_UP0) || (p == ISDN_P_LAPD_NT)) #define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0)) #define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1)) #define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0)) @@ -333,6 +333,7 @@ clear_channelmap(u_int nr, u_char *map) #define MISDN_CTRL_UNSETPEER 0x0080 #define MISDN_CTRL_RX_OFF 0x0100 #define MISDN_CTRL_FILL_EMPTY 0x0200 +#define MISDN_CTRL_GETPEER 0x0400 #define MISDN_CTRL_HW_FEATURES_OP 0x2000 #define MISDN_CTRL_HW_FEATURES 0x2001 #define MISDN_CTRL_HFC_OP 0x4000 -- cgit v1.2.3 From b36b654a7e82308cea063cdf909a7f246105c2a3 Mon Sep 17 00:00:00 2001 From: Matthias Urlichs Date: Sat, 16 Aug 2008 00:09:24 +0200 Subject: mISDN: Create /sys/class/mISDN Create /sys/class/mISDN and implement functions to handle device renames. Signed-Off-By: Matthias Urlichs Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 3f9988849f32..d4229aebf648 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -531,7 +531,8 @@ _queue_data(struct mISDNchannel *ch, u_int prim, /* global register/unregister functions */ -extern int mISDN_register_device(struct mISDNdevice *, char *name); +extern int mISDN_register_device(struct mISDNdevice *, + struct device *parent, char *name); extern void mISDN_unregister_device(struct mISDNdevice *); extern int mISDN_register_Bprotocol(struct Bprotocol *); extern void mISDN_unregister_Bprotocol(struct Bprotocol *); @@ -539,6 +540,11 @@ extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, void *); extern void mISDN_unregister_clock(struct mISDNclock *); +static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) +{ + return dev_get_drvdata(dev); +} + extern void set_channel_address(struct mISDNchannel *, u_int, u_int); extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); extern unsigned short mISDN_clock_get(void); -- cgit v1.2.3 From 3f75e84a6a697c5cffb78ee15e79498a35473e05 Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Tue, 4 Nov 2008 14:11:22 +0100 Subject: mISDN: Add layer1 prim MPH_INFORMATION_REQ MPH_INFORMATION provides full D- and B-Channel status overview - new layer1 primitive: MPF_INFORMATON_REQ - layer1 replies with MPH_INFORMATION_IND containing - dch->[state,Flags,nrbchan] - bch[]->[protocol,Flags] - hardware driver should send MPH_INFORMATION_IND on all ph state changes and BChannel state changes to MISDN_ID_ANY Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index d4229aebf648..557477ac3d5b 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -289,6 +289,23 @@ struct mISDN_devrename { char name[MISDN_MAX_IDLEN]; /* new name */ }; +/* MPH_INFORMATION_REQ payload */ +struct ph_info_ch { + __u32 protocol; + __u64 Flags; +}; + +struct ph_info_dch { + struct ph_info_ch ch; + __u16 state; + __u16 num_bch; +}; + +struct ph_info { + struct ph_info_dch dch; + struct ph_info_ch bch[]; +}; + /* timer device ioctl */ #define IMADDTIMER _IOR('I', 64, int) #define IMDELTIMER _IOR('I', 65, int) -- cgit v1.2.3 From 4db8e282f2d1dfa43d51ce2a4817901312c9134d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:32:46 -0800 Subject: Revert "driver core: move knode_bus into private structure" This reverts commit b9daa99ee533578e3f88231e7a16784dcb44ec42. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 7d9da4b4993f..8987f4776064 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,7 @@ struct device_dma_parameters { }; struct device { + struct klist_node knode_bus; struct device *parent; struct device_private *p; -- cgit v1.2.3 From cda5e83fdea476dce9c0a9b1152cd6ca46832cc4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:44:18 -0800 Subject: Revert "driver core: move knode_driver into private structure" This reverts commit 93e746db183b3bdbbda67900f79b5835f9cb388f. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 8987f4776064..c66ceb15acd8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,7 @@ struct device_dma_parameters { }; struct device { + struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.3 From 2526c151c31358aec66b63921dd712bbec5ee0cb Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Fri, 9 Jan 2009 15:49:06 -0700 Subject: drivers/of: Add the of_find_i2c_device_by_node function. The of_find_i2c_device_by_node function allows you to follow a reference in the device tree to an i2c device node and then locate the linux device instantiated by the device tree. Example use: an I2S bus driver finding the i2c_device instance for a codec described by a device tree node. This was waiting for Anton's i2c patches that were just added. Signed-off-by: Jon Smirl Signed-off-by: Grant Likely --- include/linux/of_i2c.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h index bd2a870ec296..34974b5a76f7 100644 --- a/include/linux/of_i2c.h +++ b/include/linux/of_i2c.h @@ -17,4 +17,7 @@ void of_register_i2c_devices(struct i2c_adapter *adap, struct device_node *adap_node); +/* must call put_device() when done with returned i2c_client device */ +struct i2c_client *of_find_i2c_device_by_node(struct device_node *node); + #endif /* __LINUX_OF_I2C_H */ -- cgit v1.2.3 From e2d4077678c7ec7661003c268120582adc544897 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 14:55:37 -0800 Subject: Revert "driver core: move klist_children into private structure" This reverts commit 11c3b5c3e08f4d855cbef52883c266b9ab9df879. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index c66ceb15acd8..2975351635d3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -366,6 +366,8 @@ struct device_dma_parameters { }; struct device { + struct klist klist_children; + struct klist_node knode_parent; /* node in sibling list */ struct klist_node knode_driver; struct klist_node knode_bus; struct device *parent; -- cgit v1.2.3 From 926beadb3dfaddccb3348a5b9e6c2a1f8290a220 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 Jan 2009 15:06:12 -0800 Subject: Revert "driver core: create a private portion of struct device" This reverts commit 2831fe6f9cc4e16c103504ee09a47a084297c0f3. Turns out that device_initialize shouldn't fail silently. This series needs to be reworked in order to get into proper shape. Reported-by: Stefan Richter Cc: Alan Cox Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 2975351635d3..45e5b1921fbb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -28,7 +28,6 @@ #define BUS_ID_SIZE 20 struct device; -struct device_private; struct device_driver; struct driver_private; struct class; @@ -372,8 +371,6 @@ struct device { struct klist_node knode_bus; struct device *parent; - struct device_private *p; - struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ unsigned uevent_suppress:1; -- cgit v1.2.3 From 85c210edc46d602a1562aeea0fc74919349c8cf0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 9 Jan 2009 16:40:53 -0800 Subject: compiler-gcc.h: add more comments to RELOC_HIDE Requested by C. Lameter Signed-off-by: Andi Kleen Cc: Christoph Lameter Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Mike Travis Cc: Ingo Molnar Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index af40f8eb86f0..1514d534deeb 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,9 +11,19 @@ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") -/* This macro obfuscates arithmetic on a variable address so that gcc - shouldn't recognize the original var, and make assumptions about it */ /* + * This macro obfuscates arithmetic on a variable address so that gcc + * shouldn't recognize the original var, and make assumptions about it. + * + * This is needed because the C standard makes it undefined to do + * pointer arithmetic on "objects" outside their boundaries and the + * gcc optimizers assume this is the case. In particular they + * assume such arithmetic does not wrap. + * + * A miscompilation has been observed because of this on PPC. + * To work around it we hide the relationship of the pointer and the object + * using this macro. + * * Versions of the ppc64 compiler before 4.1 had a bug where use of * RELOC_HIDE could trash r30. The bug can be worked around by changing * the inline assembly constraint from =g to =r, in this particular -- cgit v1.2.3 From 69347a236b22c3962ea812511495e502dedfd50c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 9 Jan 2009 16:40:56 -0800 Subject: memstick: annotate endianness of attribute structs The code was shifting the endianness appropriately everywhere, annotate the structs to avoid the sparse warnings when assigning the endian types to the struct members, or passing them to be[16|32]_to_cpu: drivers/memstick/core/mspro_block.c:331:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:333:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:335:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:337:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:341:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:347:4: warning: cast to restricted __be32 drivers/memstick/core/mspro_block.c:356:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:358:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:364:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:367:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:369:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:371:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:377:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:478:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:480:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:482:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:484:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:486:4: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:689:22: expected unsigned int [unsigned] [assigned] data_address drivers/memstick/core/mspro_block.c:689:22: got restricted __be32 [usertype] drivers/memstick/core/mspro_block.c:697:3: warning: cast to restricted __be32 drivers/memstick/core/mspro_block.c:960:17: warning: incorrect type in initializer (different base types) drivers/memstick/core/mspro_block.c:960:17: expected unsigned short [unsigned] data_count drivers/memstick/core/mspro_block.c:960:17: got restricted __be16 [usertype] drivers/memstick/core/mspro_block.c:993:6: warning: cast to restricted __be16 drivers/memstick/core/mspro_block.c:995:28: warning: cast to restricted __be16 Signed-off-by: Harvey Harrison Cc: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memstick.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memstick.h b/include/linux/memstick.h index d0c37e682234..690c35a9d4cc 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -100,8 +100,8 @@ struct mspro_param_register { #define MEMSTICK_SYS_PAR8 0x40 #define MEMSTICK_SYS_SERIAL 0x80 - unsigned short data_count; - unsigned int data_address; + __be16 data_count; + __be32 data_address; unsigned char tpc_param; } __attribute__((packed)); -- cgit v1.2.3 From c4be0c1dc4cdc37b175579be1460f15ac6495e9a Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Fri, 9 Jan 2009 16:40:58 -0800 Subject: filesystem freeze: add error handling of write_super_lockfs/unlockfs Currently, ext3 in mainline Linux doesn't have the freeze feature which suspends write requests. So, we cannot take a backup which keeps the filesystem's consistency with the storage device's features (snapshot and replication) while it is mounted. In many case, a commercial filesystem (e.g. VxFS) has the freeze feature and it would be used to get the consistent backup. If Linux's standard filesystem ext3 has the freeze feature, we can do it without a commercial filesystem. So I have implemented the ioctls of the freeze feature. I think we can take the consistent backup with the following steps. 1. Freeze the filesystem with the freeze ioctl. 2. Separate the replication volume or create the snapshot with the storage device's feature. 3. Unfreeze the filesystem with the unfreeze ioctl. 4. Take the backup from the separated replication volume or the snapshot. This patch: VFS: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they can return an error. Rename write_super_lockfs and unlockfs of the super block operation freeze_fs and unfreeze_fs to avoid a confusion. ext3, ext4, xfs, gfs2, jfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that write_super_lockfs returns an error if needed, and unlockfs always returns 0. reiserfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they always return 0 (success) to keep a current behavior. Signed-off-by: Takashi Sato Signed-off-by: Masayuki Hamaguchi Cc: Cc: Cc: Christoph Hellwig Cc: Dave Kleikamp Cc: Dave Chinner Cc: Alasdair G Kergon Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0b87b29f4797..3e59182de9df 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1377,8 +1377,8 @@ struct super_operations { void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - void (*write_super_lockfs) (struct super_block *); - void (*unlockfs) (struct super_block *); + int (*freeze_fs) (struct super_block *); + int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); -- cgit v1.2.3 From fcccf502540e3d752d33b2d8e976034dee81f9f7 Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Fri, 9 Jan 2009 16:40:59 -0800 Subject: filesystem freeze: implement generic freeze feature The ioctls for the generic freeze feature are below. o Freeze the filesystem int ioctl(int fd, int FIFREEZE, arg) fd: The file descriptor of the mountpoint FIFREEZE: request code for the freeze arg: Ignored Return value: 0 if the operation succeeds. Otherwise, -1 o Unfreeze the filesystem int ioctl(int fd, int FITHAW, arg) fd: The file descriptor of the mountpoint FITHAW: request code for unfreeze arg: Ignored Return value: 0 if the operation succeeds. Otherwise, -1 Error number: If the filesystem has already been unfrozen, errno is set to EINVAL. [akpm@linux-foundation.org: fix CONFIG_BLOCK=n] Signed-off-by: Takashi Sato Signed-off-by: Masayuki Hamaguchi Cc: Cc: Cc: Christoph Hellwig Cc: Dave Kleikamp Cc: Dave Chinner Cc: Alasdair G Kergon Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 11 ++++++++++- include/linux/fs.h | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 8605f8a74df9..bd7ac793be19 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -171,7 +171,7 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); int fsync_bdev(struct block_device *); struct super_block *freeze_bdev(struct block_device *); -void thaw_bdev(struct block_device *, struct super_block *); +int thaw_bdev(struct block_device *, struct super_block *); int fsync_super(struct super_block *); int fsync_no_super(struct block_device *); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, @@ -346,6 +346,15 @@ static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } static inline void invalidate_bdev(struct block_device *bdev) {} +static inline struct super_block *freeze_bdev(struct block_device *sb) +{ + return NULL; +} + +static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + return 0; +} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e59182de9df..6022f44043f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -234,6 +234,8 @@ struct inodes_stat_t { #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ +#define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) @@ -591,6 +593,11 @@ struct block_device { * care to not mess up bd_private for that case. */ unsigned long bd_private; + + /* The counter of freeze processes */ + int bd_fsfreeze_count; + /* Mutex for freeze */ + struct mutex bd_fsfreeze_mutex; }; /* -- cgit v1.2.3 From f4b477c47332367d35686bd2b808c2156b96d7c7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 10 Jan 2009 11:12:09 +0000 Subject: rbtree: add const qualifier to some functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'rb_first()', 'rb_last()', 'rb_next()' and 'rb_prev()' calls take a pointer to an RB node or RB root. They do not change the pointed objects, so add a 'const' qualifier in order to make life of the users of these functions easier. Indeed, if I have my own constant pointer &const struct my_type *p, and I call 'rb_next(&p->rb)', I get a GCC warning: warning: passing argument 1 of ‘rb_next’ discards qualifiers from pointer target type Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- include/linux/rbtree.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 344bc3495ddb..9c295411d01f 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -140,10 +140,10 @@ extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); /* Find logical next and previous nodes in a tree */ -extern struct rb_node *rb_next(struct rb_node *); -extern struct rb_node *rb_prev(struct rb_node *); -extern struct rb_node *rb_first(struct rb_root *); -extern struct rb_node *rb_last(struct rb_root *); +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, -- cgit v1.2.3 From 886ad09fc83342aa1c5a02a0b6d3298b78a8067f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 9 Jan 2009 15:54:07 -0800 Subject: libata: Add a per-host flag to opt-in into parallel port probes This patch adds a per host flag that allows drivers to opt in into having its busses scanned in parallel. Drivers that do not set this flag get their ports scanned in the "original" sequence. Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 4f7c8fb4d3fe..b6b8a7f3ec66 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -239,6 +239,7 @@ enum { /* host set flags */ ATA_HOST_SIMPLEX = (1 << 0), /* Host is simplex, one DMA channel per host only */ ATA_HOST_STARTED = (1 << 1), /* Host started */ + ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ /* bits 24:31 of host->flags are reserved for LLD specific flags */ -- cgit v1.2.3 From f52046b14b1e1a8a02ae48d0c69d39c5e204644f Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:49:01 +0100 Subject: mfd: PCF50633 core driver This patch implements the core of the PCF50633 driver. This core driver has generic register read/write functions and does interrupt management for its sub devices. Signed-off-by: Balaji Rao Cc: Andy Green Signed-off-by: Samuel Ortiz --- include/linux/mfd/pcf50633/core.h | 218 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 include/linux/mfd/pcf50633/core.h (limited to 'include/linux') diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h new file mode 100644 index 000000000000..4455b212d75a --- /dev/null +++ b/include/linux/mfd/pcf50633/core.h @@ -0,0 +1,218 @@ +/* + * core.h -- Core driver for NXP PCF50633 + * + * (C) 2006-2008 by Openmoko, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_MFD_PCF50633_CORE_H +#define __LINUX_MFD_PCF50633_CORE_H + +#include +#include +#include +#include +#include + +struct pcf50633; + +#define PCF50633_NUM_REGULATORS 11 + +struct pcf50633_platform_data { + struct regulator_init_data reg_init_data[PCF50633_NUM_REGULATORS]; + + char **batteries; + int num_batteries; + + /* Callbacks */ + void (*probe_done)(struct pcf50633 *); + void (*mbc_event_callback)(struct pcf50633 *, int); + void (*regulator_registered)(struct pcf50633 *, int); + void (*force_shutdown)(struct pcf50633 *); + + u8 resumers[5]; +}; + +struct pcf50633_subdev_pdata { + struct pcf50633 *pcf; +}; + +struct pcf50633_irq { + void (*handler) (int, void *); + void *data; +}; + +int pcf50633_register_irq(struct pcf50633 *pcf, int irq, + void (*handler) (int, void *), void *data); +int pcf50633_free_irq(struct pcf50633 *pcf, int irq); + +int pcf50633_irq_mask(struct pcf50633 *pcf, int irq); +int pcf50633_irq_unmask(struct pcf50633 *pcf, int irq); +int pcf50633_irq_mask_get(struct pcf50633 *pcf, int irq); + +int pcf50633_read_block(struct pcf50633 *, u8 reg, + int nr_regs, u8 *data); +int pcf50633_write_block(struct pcf50633 *pcf, u8 reg, + int nr_regs, u8 *data); +u8 pcf50633_reg_read(struct pcf50633 *, u8 reg); +int pcf50633_reg_write(struct pcf50633 *pcf, u8 reg, u8 val); + +int pcf50633_reg_set_bit_mask(struct pcf50633 *pcf, u8 reg, u8 mask, u8 val); +int pcf50633_reg_clear_bits(struct pcf50633 *pcf, u8 reg, u8 bits); + +/* Interrupt registers */ + +#define PCF50633_REG_INT1 0x02 +#define PCF50633_REG_INT2 0x03 +#define PCF50633_REG_INT3 0x04 +#define PCF50633_REG_INT4 0x05 +#define PCF50633_REG_INT5 0x06 + +#define PCF50633_REG_INT1M 0x07 +#define PCF50633_REG_INT2M 0x08 +#define PCF50633_REG_INT3M 0x09 +#define PCF50633_REG_INT4M 0x0a +#define PCF50633_REG_INT5M 0x0b + +enum { + /* Chip IRQs */ + PCF50633_IRQ_ADPINS, + PCF50633_IRQ_ADPREM, + PCF50633_IRQ_USBINS, + PCF50633_IRQ_USBREM, + PCF50633_IRQ_RESERVED1, + PCF50633_IRQ_RESERVED2, + PCF50633_IRQ_ALARM, + PCF50633_IRQ_SECOND, + PCF50633_IRQ_ONKEYR, + PCF50633_IRQ_ONKEYF, + PCF50633_IRQ_EXTON1R, + PCF50633_IRQ_EXTON1F, + PCF50633_IRQ_EXTON2R, + PCF50633_IRQ_EXTON2F, + PCF50633_IRQ_EXTON3R, + PCF50633_IRQ_EXTON3F, + PCF50633_IRQ_BATFULL, + PCF50633_IRQ_CHGHALT, + PCF50633_IRQ_THLIMON, + PCF50633_IRQ_THLIMOFF, + PCF50633_IRQ_USBLIMON, + PCF50633_IRQ_USBLIMOFF, + PCF50633_IRQ_ADCRDY, + PCF50633_IRQ_ONKEY1S, + PCF50633_IRQ_LOWSYS, + PCF50633_IRQ_LOWBAT, + PCF50633_IRQ_HIGHTMP, + PCF50633_IRQ_AUTOPWRFAIL, + PCF50633_IRQ_DWN1PWRFAIL, + PCF50633_IRQ_DWN2PWRFAIL, + PCF50633_IRQ_LEDPWRFAIL, + PCF50633_IRQ_LEDOVP, + PCF50633_IRQ_LDO1PWRFAIL, + PCF50633_IRQ_LDO2PWRFAIL, + PCF50633_IRQ_LDO3PWRFAIL, + PCF50633_IRQ_LDO4PWRFAIL, + PCF50633_IRQ_LDO5PWRFAIL, + PCF50633_IRQ_LDO6PWRFAIL, + PCF50633_IRQ_HCLDOPWRFAIL, + PCF50633_IRQ_HCLDOOVL, + + /* Always last */ + PCF50633_NUM_IRQ, +}; + +struct pcf50633 { + struct device *dev; + struct i2c_client *i2c_client; + + struct pcf50633_platform_data *pdata; + int irq; + struct pcf50633_irq irq_handler[PCF50633_NUM_IRQ]; + struct work_struct irq_work; + struct mutex lock; + + u8 mask_regs[5]; + + u8 suspend_irq_masks[5]; + u8 resume_reason[5]; + int is_suspended; + + int onkey1s_held; + + struct platform_device *rtc_pdev; + struct platform_device *mbc_pdev; + struct platform_device *adc_pdev; + struct platform_device *input_pdev; + struct platform_device *regulator_pdev[PCF50633_NUM_REGULATORS]; +}; + +enum pcf50633_reg_int1 { + PCF50633_INT1_ADPINS = 0x01, /* Adapter inserted */ + PCF50633_INT1_ADPREM = 0x02, /* Adapter removed */ + PCF50633_INT1_USBINS = 0x04, /* USB inserted */ + PCF50633_INT1_USBREM = 0x08, /* USB removed */ + /* reserved */ + PCF50633_INT1_ALARM = 0x40, /* RTC alarm time is reached */ + PCF50633_INT1_SECOND = 0x80, /* RTC periodic second interrupt */ +}; + +enum pcf50633_reg_int2 { + PCF50633_INT2_ONKEYR = 0x01, /* ONKEY rising edge */ + PCF50633_INT2_ONKEYF = 0x02, /* ONKEY falling edge */ + PCF50633_INT2_EXTON1R = 0x04, /* EXTON1 rising edge */ + PCF50633_INT2_EXTON1F = 0x08, /* EXTON1 falling edge */ + PCF50633_INT2_EXTON2R = 0x10, /* EXTON2 rising edge */ + PCF50633_INT2_EXTON2F = 0x20, /* EXTON2 falling edge */ + PCF50633_INT2_EXTON3R = 0x40, /* EXTON3 rising edge */ + PCF50633_INT2_EXTON3F = 0x80, /* EXTON3 falling edge */ +}; + +enum pcf50633_reg_int3 { + PCF50633_INT3_BATFULL = 0x01, /* Battery full */ + PCF50633_INT3_CHGHALT = 0x02, /* Charger halt */ + PCF50633_INT3_THLIMON = 0x04, + PCF50633_INT3_THLIMOFF = 0x08, + PCF50633_INT3_USBLIMON = 0x10, + PCF50633_INT3_USBLIMOFF = 0x20, + PCF50633_INT3_ADCRDY = 0x40, /* ADC result ready */ + PCF50633_INT3_ONKEY1S = 0x80, /* ONKEY pressed 1 second */ +}; + +enum pcf50633_reg_int4 { + PCF50633_INT4_LOWSYS = 0x01, + PCF50633_INT4_LOWBAT = 0x02, + PCF50633_INT4_HIGHTMP = 0x04, + PCF50633_INT4_AUTOPWRFAIL = 0x08, + PCF50633_INT4_DWN1PWRFAIL = 0x10, + PCF50633_INT4_DWN2PWRFAIL = 0x20, + PCF50633_INT4_LEDPWRFAIL = 0x40, + PCF50633_INT4_LEDOVP = 0x80, +}; + +enum pcf50633_reg_int5 { + PCF50633_INT5_LDO1PWRFAIL = 0x01, + PCF50633_INT5_LDO2PWRFAIL = 0x02, + PCF50633_INT5_LDO3PWRFAIL = 0x04, + PCF50633_INT5_LDO4PWRFAIL = 0x08, + PCF50633_INT5_LDO5PWRFAIL = 0x10, + PCF50633_INT5_LDO6PWRFAIL = 0x20, + PCF50633_INT5_HCLDOPWRFAIL = 0x40, + PCF50633_INT5_HCLDOOVL = 0x80, +}; + +/* misc. registers */ +#define PCF50633_REG_OOCSHDWN 0x0c + +/* LED registers */ +#define PCF50633_REG_LEDOUT 0x28 +#define PCF50633_REG_LEDENA 0x29 +#define PCF50633_REG_LEDCTL 0x2a +#define PCF50633_REG_LEDDIM 0x2b + +#endif + -- cgit v1.2.3 From 08c3e06a5eb27d43b712adef18379f8464425e71 Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:49:26 +0100 Subject: mfd: PCF50633 adc driver This patch adds basic support for the PCF50633 ADC. The subtractive mode is not supported yet. Since we don't have adc subsystem, it currently lives in drivers/mfd. Signed-off-by: Balaji Rao Cc: Andy Green Acked-by: Jonathan Cameron Signed-off-by: Samuel Ortiz --- include/linux/mfd/pcf50633/adc.h | 72 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 include/linux/mfd/pcf50633/adc.h (limited to 'include/linux') diff --git a/include/linux/mfd/pcf50633/adc.h b/include/linux/mfd/pcf50633/adc.h new file mode 100644 index 000000000000..56669b4183ad --- /dev/null +++ b/include/linux/mfd/pcf50633/adc.h @@ -0,0 +1,72 @@ +/* + * adc.h -- Driver for NXP PCF50633 ADC + * + * (C) 2006-2008 by Openmoko, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_MFD_PCF50633_ADC_H +#define __LINUX_MFD_PCF50633_ADC_H + +#include +#include + +/* ADC Registers */ +#define PCF50633_REG_ADCC3 0x52 +#define PCF50633_REG_ADCC2 0x53 +#define PCF50633_REG_ADCC1 0x54 +#define PCF50633_REG_ADCS1 0x55 +#define PCF50633_REG_ADCS2 0x56 +#define PCF50633_REG_ADCS3 0x57 + +#define PCF50633_ADCC1_ADCSTART 0x01 +#define PCF50633_ADCC1_RES_10BIT 0x02 +#define PCF50633_ADCC1_AVERAGE_NO 0x00 +#define PCF50633_ADCC1_AVERAGE_4 0x04 +#define PCF50633_ADCC1_AVERAGE_8 0x08 +#define PCF50633_ADCC1_AVERAGE_16 0x0c +#define PCF50633_ADCC1_MUX_BATSNS_RES 0x00 +#define PCF50633_ADCC1_MUX_BATSNS_SUBTR 0x10 +#define PCF50633_ADCC1_MUX_ADCIN2_RES 0x20 +#define PCF50633_ADCC1_MUX_ADCIN2_SUBTR 0x30 +#define PCF50633_ADCC1_MUX_BATTEMP 0x60 +#define PCF50633_ADCC1_MUX_ADCIN1 0x70 +#define PCF50633_ADCC1_AVERAGE_MASK 0x0c +#define PCF50633_ADCC1_ADCMUX_MASK 0xf0 + +#define PCF50633_ADCC2_RATIO_NONE 0x00 +#define PCF50633_ADCC2_RATIO_BATTEMP 0x01 +#define PCF50633_ADCC2_RATIO_ADCIN1 0x02 +#define PCF50633_ADCC2_RATIO_BOTH 0x03 +#define PCF50633_ADCC2_RATIOSETTL_100US 0x04 + +#define PCF50633_ADCC3_ACCSW_EN 0x01 +#define PCF50633_ADCC3_NTCSW_EN 0x04 +#define PCF50633_ADCC3_RES_DIV_TWO 0x10 +#define PCF50633_ADCC3_RES_DIV_THREE 0x00 + +#define PCF50633_ADCS3_REF_NTCSW 0x00 +#define PCF50633_ADCS3_REF_ACCSW 0x10 +#define PCF50633_ADCS3_REF_2V0 0x20 +#define PCF50633_ADCS3_REF_VISA 0x30 +#define PCF50633_ADCS3_REF_2V0_2 0x70 +#define PCF50633_ADCS3_ADCRDY 0x80 + +#define PCF50633_ADCS3_ADCDAT1L_MASK 0x03 +#define PCF50633_ADCS3_ADCDAT2L_MASK 0x0c +#define PCF50633_ADCS3_ADCDAT2L_SHIFT 2 +#define PCF50633_ASCS3_REF_MASK 0x70 + +extern int +pcf50633_adc_async_read(struct pcf50633 *pcf, int mux, int avg, + void (*callback)(struct pcf50633 *, void *, int), + void *callback_param); +extern int +pcf50633_adc_sync_read(struct pcf50633 *pcf, int mux, int avg); + +#endif /* __LINUX_PCF50633_ADC_H */ -- cgit v1.2.3 From 6a3d119b4ce29cf32bfe91eb61d46e9dbd8ce38a Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:49:37 +0100 Subject: mfd: PCF50633 gpio support What the PCF05633 calls as a 'GPIO' is much more than the GPIO in the linux sense and there are only 4 of them - which means, the gpiolib is not used here. Signed-off-by: Balaji Rao Cc: Andy Green Signed-off-by: Samuel Ortiz --- include/linux/mfd/pcf50633/gpio.h | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/linux/mfd/pcf50633/gpio.h (limited to 'include/linux') diff --git a/include/linux/mfd/pcf50633/gpio.h b/include/linux/mfd/pcf50633/gpio.h new file mode 100644 index 000000000000..a42b845efc54 --- /dev/null +++ b/include/linux/mfd/pcf50633/gpio.h @@ -0,0 +1,52 @@ +/* + * gpio.h -- GPIO driver for NXP PCF50633 + * + * (C) 2006-2008 by Openmoko, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_MFD_PCF50633_GPIO_H +#define __LINUX_MFD_PCF50633_GPIO_H + +#include + +#define PCF50633_GPIO1 1 +#define PCF50633_GPIO2 2 +#define PCF50633_GPIO3 3 +#define PCF50633_GPO 4 + +#define PCF50633_REG_GPIO1CFG 0x14 +#define PCF50633_REG_GPIO2CFG 0x15 +#define PCF50633_REG_GPIO3CFG 0x16 +#define PCF50633_REG_GPOCFG 0x17 + +#define PCF50633_GPOCFG_GPOSEL_MASK 0x07 + +enum pcf50633_reg_gpocfg { + PCF50633_GPOCFG_GPOSEL_0 = 0x00, + PCF50633_GPOCFG_GPOSEL_LED_NFET = 0x01, + PCF50633_GPOCFG_GPOSEL_SYSxOK = 0x02, + PCF50633_GPOCFG_GPOSEL_CLK32K = 0x03, + PCF50633_GPOCFG_GPOSEL_ADAPUSB = 0x04, + PCF50633_GPOCFG_GPOSEL_USBxOK = 0x05, + PCF50633_GPOCFG_GPOSEL_ACTPH4 = 0x06, + PCF50633_GPOCFG_GPOSEL_1 = 0x07, + PCF50633_GPOCFG_GPOSEL_INVERSE = 0x08, +}; + +int pcf50633_gpio_set(struct pcf50633 *pcf, int gpio, u8 val); +u8 pcf50633_gpio_get(struct pcf50633 *pcf, int gpio); + +int pcf50633_gpio_invert_set(struct pcf50633 *, int gpio, int invert); +int pcf50633_gpio_invert_get(struct pcf50633 *pcf, int gpio); + +int pcf50633_gpio_power_supply_set(struct pcf50633 *, + int gpio, int regulator, int on); +#endif /* __LINUX_MFD_PCF50633_GPIO_H */ + + -- cgit v1.2.3 From f5714dc97d63cc0dd1219bd0eb2e1f8df1e4347a Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:50:55 +0100 Subject: power_supply: PCF50633 battery charger driver Signed-off-by: Balaji Rao Cc: Andy Green Cc: David Woodhouse Acked-by: Anton Vorontsov Signed-off-by: Samuel Ortiz --- include/linux/mfd/pcf50633/mbc.h | 134 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 include/linux/mfd/pcf50633/mbc.h (limited to 'include/linux') diff --git a/include/linux/mfd/pcf50633/mbc.h b/include/linux/mfd/pcf50633/mbc.h new file mode 100644 index 000000000000..6e17619b773a --- /dev/null +++ b/include/linux/mfd/pcf50633/mbc.h @@ -0,0 +1,134 @@ +/* + * mbc.h -- Driver for NXP PCF50633 Main Battery Charger + * + * (C) 2006-2008 by Openmoko, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_MFD_PCF50633_MBC_H +#define __LINUX_MFD_PCF50633_MBC_H + +#include +#include + +#define PCF50633_REG_MBCC1 0x43 +#define PCF50633_REG_MBCC2 0x44 +#define PCF50633_REG_MBCC3 0x45 +#define PCF50633_REG_MBCC4 0x46 +#define PCF50633_REG_MBCC5 0x47 +#define PCF50633_REG_MBCC6 0x48 +#define PCF50633_REG_MBCC7 0x49 +#define PCF50633_REG_MBCC8 0x4a +#define PCF50633_REG_MBCS1 0x4b +#define PCF50633_REG_MBCS2 0x4c +#define PCF50633_REG_MBCS3 0x4d + +enum pcf50633_reg_mbcc1 { + PCF50633_MBCC1_CHGENA = 0x01, /* Charger enable */ + PCF50633_MBCC1_AUTOSTOP = 0x02, + PCF50633_MBCC1_AUTORES = 0x04, /* automatic resume */ + PCF50633_MBCC1_RESUME = 0x08, /* explicit resume cmd */ + PCF50633_MBCC1_RESTART = 0x10, /* restart charging */ + PCF50633_MBCC1_PREWDTIME_60M = 0x20, /* max. precharging time */ + PCF50633_MBCC1_WDTIME_1H = 0x00, + PCF50633_MBCC1_WDTIME_2H = 0x40, + PCF50633_MBCC1_WDTIME_4H = 0x80, + PCF50633_MBCC1_WDTIME_6H = 0xc0, +}; +#define PCF50633_MBCC1_WDTIME_MASK 0xc0 + +enum pcf50633_reg_mbcc2 { + PCF50633_MBCC2_VBATCOND_2V7 = 0x00, + PCF50633_MBCC2_VBATCOND_2V85 = 0x01, + PCF50633_MBCC2_VBATCOND_3V0 = 0x02, + PCF50633_MBCC2_VBATCOND_3V15 = 0x03, + PCF50633_MBCC2_VMAX_4V = 0x00, + PCF50633_MBCC2_VMAX_4V20 = 0x28, + PCF50633_MBCC2_VRESDEBTIME_64S = 0x80, /* debounce time (32/64sec) */ +}; + +enum pcf50633_reg_mbcc7 { + PCF50633_MBCC7_USB_100mA = 0x00, + PCF50633_MBCC7_USB_500mA = 0x01, + PCF50633_MBCC7_USB_1000mA = 0x02, + PCF50633_MBCC7_USB_SUSPEND = 0x03, + PCF50633_MBCC7_BATTEMP_EN = 0x04, + PCF50633_MBCC7_BATSYSIMAX_1A6 = 0x00, + PCF50633_MBCC7_BATSYSIMAX_1A8 = 0x40, + PCF50633_MBCC7_BATSYSIMAX_2A0 = 0x80, + PCF50633_MBCC7_BATSYSIMAX_2A2 = 0xc0, +}; +#define PCF50633_MBCC7_USB_MASK 0x03 + +enum pcf50633_reg_mbcc8 { + PCF50633_MBCC8_USBENASUS = 0x10, +}; + +enum pcf50633_reg_mbcs1 { + PCF50633_MBCS1_USBPRES = 0x01, + PCF50633_MBCS1_USBOK = 0x02, + PCF50633_MBCS1_ADAPTPRES = 0x04, + PCF50633_MBCS1_ADAPTOK = 0x08, + PCF50633_MBCS1_TBAT_OK = 0x00, + PCF50633_MBCS1_TBAT_ABOVE = 0x10, + PCF50633_MBCS1_TBAT_BELOW = 0x20, + PCF50633_MBCS1_TBAT_UNDEF = 0x30, + PCF50633_MBCS1_PREWDTEXP = 0x40, + PCF50633_MBCS1_WDTEXP = 0x80, +}; + +enum pcf50633_reg_mbcs2_mbcmod { + PCF50633_MBCS2_MBC_PLAY = 0x00, + PCF50633_MBCS2_MBC_USB_PRE = 0x01, + PCF50633_MBCS2_MBC_USB_PRE_WAIT = 0x02, + PCF50633_MBCS2_MBC_USB_FAST = 0x03, + PCF50633_MBCS2_MBC_USB_FAST_WAIT = 0x04, + PCF50633_MBCS2_MBC_USB_SUSPEND = 0x05, + PCF50633_MBCS2_MBC_ADP_PRE = 0x06, + PCF50633_MBCS2_MBC_ADP_PRE_WAIT = 0x07, + PCF50633_MBCS2_MBC_ADP_FAST = 0x08, + PCF50633_MBCS2_MBC_ADP_FAST_WAIT = 0x09, + PCF50633_MBCS2_MBC_BAT_FULL = 0x0a, + PCF50633_MBCS2_MBC_HALT = 0x0b, +}; +#define PCF50633_MBCS2_MBC_MASK 0x0f +enum pcf50633_reg_mbcs2_chgstat { + PCF50633_MBCS2_CHGS_NONE = 0x00, + PCF50633_MBCS2_CHGS_ADAPTER = 0x10, + PCF50633_MBCS2_CHGS_USB = 0x20, + PCF50633_MBCS2_CHGS_BOTH = 0x30, +}; +#define PCF50633_MBCS2_RESSTAT_AUTO 0x40 + +enum pcf50633_reg_mbcs3 { + PCF50633_MBCS3_USBLIM_PLAY = 0x01, + PCF50633_MBCS3_USBLIM_CGH = 0x02, + PCF50633_MBCS3_TLIM_PLAY = 0x04, + PCF50633_MBCS3_TLIM_CHG = 0x08, + PCF50633_MBCS3_ILIM = 0x10, /* 1: Ibat > Icutoff */ + PCF50633_MBCS3_VLIM = 0x20, /* 1: Vbat == Vmax */ + PCF50633_MBCS3_VBATSTAT = 0x40, /* 1: Vbat > Vbatcond */ + PCF50633_MBCS3_VRES = 0x80, /* 1: Vbat > Vth(RES) */ +}; + +#define PCF50633_MBCC2_VBATCOND_MASK 0x03 +#define PCF50633_MBCC2_VMAX_MASK 0x3c + +/* Charger status */ +#define PCF50633_MBC_USB_ONLINE 0x01 +#define PCF50633_MBC_USB_ACTIVE 0x02 +#define PCF50633_MBC_ADAPTER_ONLINE 0x04 +#define PCF50633_MBC_ADAPTER_ACTIVE 0x08 + +int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma); + +int pcf50633_mbc_get_status(struct pcf50633 *); +void pcf50633_mbc_set_status(struct pcf50633 *, int what, int status); + +#endif + -- cgit v1.2.3 From 5ec271e745350c7df6a6ebca24b43cb7a10bfa4a Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:51:01 +0100 Subject: regulator: PCF50633 pmic driver Changes from V1: - Removed support for suspend_enable & suspend_disable functions. Signed-off-by: Balaji Rao Cc: Andy Green Cc: Liam Girdwood Acked-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/pcf50633/pmic.h | 67 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 include/linux/mfd/pcf50633/pmic.h (limited to 'include/linux') diff --git a/include/linux/mfd/pcf50633/pmic.h b/include/linux/mfd/pcf50633/pmic.h new file mode 100644 index 000000000000..2d3dbe53b235 --- /dev/null +++ b/include/linux/mfd/pcf50633/pmic.h @@ -0,0 +1,67 @@ +#ifndef __LINUX_MFD_PCF50633_PMIC_H +#define __LINUX_MFD_PCF50633_PMIC_H + +#include +#include + +#define PCF50633_REG_AUTOOUT 0x1a +#define PCF50633_REG_AUTOENA 0x1b +#define PCF50633_REG_AUTOCTL 0x1c +#define PCF50633_REG_AUTOMXC 0x1d +#define PCF50633_REG_DOWN1OUT 0x1e +#define PCF50633_REG_DOWN1ENA 0x1f +#define PCF50633_REG_DOWN1CTL 0x20 +#define PCF50633_REG_DOWN1MXC 0x21 +#define PCF50633_REG_DOWN2OUT 0x22 +#define PCF50633_REG_DOWN2ENA 0x23 +#define PCF50633_REG_DOWN2CTL 0x24 +#define PCF50633_REG_DOWN2MXC 0x25 +#define PCF50633_REG_MEMLDOOUT 0x26 +#define PCF50633_REG_MEMLDOENA 0x27 +#define PCF50633_REG_LDO1OUT 0x2d +#define PCF50633_REG_LDO1ENA 0x2e +#define PCF50633_REG_LDO2OUT 0x2f +#define PCF50633_REG_LDO2ENA 0x30 +#define PCF50633_REG_LDO3OUT 0x31 +#define PCF50633_REG_LDO3ENA 0x32 +#define PCF50633_REG_LDO4OUT 0x33 +#define PCF50633_REG_LDO4ENA 0x34 +#define PCF50633_REG_LDO5OUT 0x35 +#define PCF50633_REG_LDO5ENA 0x36 +#define PCF50633_REG_LDO6OUT 0x37 +#define PCF50633_REG_LDO6ENA 0x38 +#define PCF50633_REG_HCLDOOUT 0x39 +#define PCF50633_REG_HCLDOENA 0x3a +#define PCF50633_REG_HCLDOOVL 0x40 + +enum pcf50633_regulator_enable { + PCF50633_REGULATOR_ON = 0x01, + PCF50633_REGULATOR_ON_GPIO1 = 0x02, + PCF50633_REGULATOR_ON_GPIO2 = 0x04, + PCF50633_REGULATOR_ON_GPIO3 = 0x08, +}; +#define PCF50633_REGULATOR_ON_MASK 0x0f + +enum pcf50633_regulator_phase { + PCF50633_REGULATOR_ACTPH1 = 0x00, + PCF50633_REGULATOR_ACTPH2 = 0x10, + PCF50633_REGULATOR_ACTPH3 = 0x20, + PCF50633_REGULATOR_ACTPH4 = 0x30, +}; +#define PCF50633_REGULATOR_ACTPH_MASK 0x30 + +enum pcf50633_regulator_id { + PCF50633_REGULATOR_AUTO, + PCF50633_REGULATOR_DOWN1, + PCF50633_REGULATOR_DOWN2, + PCF50633_REGULATOR_LDO1, + PCF50633_REGULATOR_LDO2, + PCF50633_REGULATOR_LDO3, + PCF50633_REGULATOR_LDO4, + PCF50633_REGULATOR_LDO5, + PCF50633_REGULATOR_LDO6, + PCF50633_REGULATOR_HCLDO, + PCF50633_REGULATOR_MEMLDO, +}; +#endif + -- cgit v1.2.3 From 53ce3d9564908794ae7dd32969089b57df5fc098 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 9 Jan 2009 12:27:08 -0800 Subject: smp_call_function_single(): be slightly less stupid If you do smp_call_function_single(expression-with-side-effects, ...) then expression-with-side-effects never gets evaluated on UP builds. As always, implementing it in C is the correct thing to do. While we're there, uninline it for size and possible header dependency reasons. And create a new kernel/up.c, as a place in which to put uniprocessor-specific code and storage. It should mirror kernel/smp.c. Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/smp.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index b82466968101..715196b09d67 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -24,6 +24,9 @@ struct call_single_data { /* total number of cpus in this system (may exceed NR_CPUS) */ extern unsigned int total_cpus; +int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, + int wait); + #ifdef CONFIG_SMP #include @@ -79,8 +82,6 @@ smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, return 0; } -int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, - int wait); void __smp_call_function_single(int cpuid, struct call_single_data *data); /* @@ -140,14 +141,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) -#define smp_call_function_single(cpuid, func, info, wait) \ -({ \ - WARN_ON(cpuid != 0); \ - local_irq_disable(); \ - (func)(info); \ - local_irq_enable(); \ - 0; \ -}) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) #define smp_call_function_many(mask, func, info, wait) \ -- cgit v1.2.3 From 649274d993212e7c23c0cb734572c2311c200872 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 11 Jan 2009 00:20:39 -0800 Subject: net_dma: acquire/release dma channels on ifup/ifdown The recent dmaengine rework removed the capability to remove dma device driver modules while net_dma is active. Rather than notify dmaengine-clients that channels are trying to be removed, we now rely on clients to notify dmaengine when they no longer have a need for channels. Teach net_dma to release channels by taking dmaengine references at netdevice open and dropping references at netdevice close. Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams Signed-off-by: David S. Miller --- include/linux/dmaengine.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 64dea2ab326c..c73f1e2b59b7 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -270,8 +270,18 @@ struct dma_device { /* --- public DMA engine API --- */ +#ifdef CONFIG_DMA_ENGINE void dmaengine_get(void); void dmaengine_put(void); +#else +static inline void dmaengine_get(void) +{ +} +static inline void dmaengine_put(void) +{ +} +#endif + dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, -- cgit v1.2.3 From 57de16e612d63138bd2c618449af9d8312466e25 Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Sun, 26 Oct 2008 13:30:09 +0100 Subject: BUGFIX: used NULL pointer at ioctl(sk,IMGETDEVINFO,&devinfo) when devinfo.id not registered daxtar example # modprobe hfcsusb daxtar example # modprobe mISDN_l1loop daxtar example # ./misdnportinfo Found 3 devices id: 0 Dprotocols: 00000006 Bprotocols: 0000000e protocol: 0 nrbchan: 2 name: HFC-S_USB.1 id: 1 Dprotocols: 00000006 Bprotocols: 0000000e protocol: 0 nrbchan: 2 name: mISDN_l1loop.1 id: 2 Dprotocols: 00000006 Bprotocols: 0000000e protocol: 0 nrbchan: 2 name: mISDN_l1loop.2 daxtar example # rmmod hfcsusb daxtar example # ./misdnportinfo Found 2 devices *Segmentation* *fault* dmesg: [ 9914.939718] BUG: unable to handle kernel NULL pointer dereference at 000000d4 [ 9914.939721] IP: [] :mISDN_core:get_mdevice+0x19/0x22 [ 9914.939729] *pde = 00000000 [ 9914.939732] Oops: 0000 [#14] PREEMPT SMP [ 9914.939734] Modules linked in: mISDN_l1loop mISDN_core vmnet vmblock vmci vmmon coretemp w83627ehf hwmon_vid rfcomm l2cap blue tooth usbhid snd_usb_audio snd_usb_lib snd_rawmidi snd_hwdep fuse nvidia(P) uhci_hcd i2c_i801 ehci_hcd snd_hda_intel atl1 usbcore i2c_core parport_seria l [last unloaded: hfcsusb] [ 9914.939751] Pid: 29618, comm: misdnportinfo Tainted: P D (2.6.27.3 #5) [ 9914.939753] EIP: 0060:[] EFLAGS: 00210246 CPU: 0 [ 9914.939758] EIP is at get_mdevice+0x19/0x22 [mISDN_core] [ 9914.939760] EAX: 00000000 EBX: f8fa791c ECX: f6afaa58 EDX: f7960cf4 [ 9914.939762] ESI: 80044944 EDI: bfc2e62c EBP: bfc2e62c ESP: f5adbef4 [ 9914.939763] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 9914.939765] Process misdnportinfo (pid: 29618, ti=f5ada000 task=f6bec430 task.ti=f5ada000) [ 9914.939767] Stack: f8f9f4e0 00000000 f8f9f867 bfc2e62c 0000000a c02461e8 00200246 c042dde8 [ 9914.939771] 00000003 c042dde4 00000000 00000001 00200082 c0114775 00000000 00000000 [ 9914.939775] 00000003 f7088010 00200282 f8fa791c 80044944 bfc2e62c bfc2e62c c02f6615 [ 9914.939780] Call Trace: [ 9914.939782] [] _get_mdevice+0x0/0x18 [mISDN_core] [ 9914.939789] [] base_sock_ioctl+0x7a/0x129 [mISDN_core] [ 9914.939789] [] opost+0x171/0x182 [ 9914.939789] [] __wake_up+0x29/0x39 [ 9914.939789] [] sock_ioctl+0x1b5/0x1d9 [ 9914.939789] [] sock_ioctl+0x0/0x1d9 [ 9914.939789] [] vfs_ioctl+0x1c/0x5d [ 9914.939789] [] do_vfs_ioctl+0x23e/0x24e [ 9914.939789] [] sys_ioctl+0x2c/0x45 [ 9914.939789] [] sysenter_do_call+0x12/0x21 [ 9914.939789] [] pci_fixup_i450gx+0x4e/0x56 [ 9914.939789] ======================= [ 9914.939789] Code: 00 68 02 f0 f9 f8 e8 ae b4 2c c7 8b 44 24 04 5a 59 c3 83 ec 04 31 d2 89 04 24 89 e1 b8 ac df fa f8 68 e0 f4 f9 f8 e8 4a b5 2c c7 <8b> 80 d4 00 00 00 5a 59 c3 53 89 cb 8d 90 9c 00 00 00 89 c8 e8 [ 9914.939789] EIP: [] get_mdevice+0x19/0x22 [mISDN_core] SS:ESP 0068:f5adbef4 [ 9914.939858] ---[ end trace 50e18a715b019424 ]--- Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 557477ac3d5b..5da3d95b27f1 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -559,7 +559,10 @@ extern void mISDN_unregister_clock(struct mISDNclock *); static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) { - return dev_get_drvdata(dev); + if (dev) + return dev_get_drvdata(dev); + else + return NULL; } extern void set_channel_address(struct mISDNchannel *, u_int, u_int); -- cgit v1.2.3 From 2e4c77bea3d8b17d94f8ee382411f359b708560f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 30 Dec 2008 14:16:41 +0100 Subject: m68k: dio - Kill warn_unused_result warnings warning: ignoring return value of 'device_register', declared with attribute warn_unused_result warning: ignoring return value of 'device_create_file', declared with attribute warn_unused_result Signed-off-by: Geert Uytterhoeven --- include/linux/dio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dio.h b/include/linux/dio.h index 1e65ebc2a3db..b2dd31ca1710 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -241,7 +241,7 @@ struct dio_driver { extern int dio_find(int deviceid); extern unsigned long dio_scodetophysaddr(int scode); -extern void dio_create_sysfs_dev_files(struct dio_dev *); +extern int dio_create_sysfs_dev_files(struct dio_dev *); /* New-style probing */ extern int dio_register_driver(struct dio_driver *); -- cgit v1.2.3 From 985ebdb5ed54151eba734aa1b307460e8e4267ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Mon, 12 Jan 2009 16:32:13 -0800 Subject: net: Fix a comment in include/linux/netdevice.h. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a comment in include/linux/netdevice.h. Signed-off-by: Krzysztof HaƂasa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f24556813375..4647604c7ca9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -467,7 +467,7 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * - * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * int (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev); * Called when a packet needs to be transmitted. * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, * Required can not be NULL. -- cgit v1.2.3 From daaf83d2b9277928739f3eb7ea64f49c1254fd62 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 12 Jan 2009 00:06:11 +0000 Subject: netfilter 09/09: remove padding from struct xt_match on 64bit builds reorder struct xt_match to remove 8 bytes of padding and make its size 128 bytes. This saves a small amount of data space in each of the xt netfilter modules and fits xt_match in one 128 byte cache line. Signed-off-by: Richard Kennedy Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e52ce475d19f..c7ee8744d26b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -270,6 +270,7 @@ struct xt_match struct list_head list; const char name[XT_FUNCTION_MAXNAMELEN-1]; + u_int8_t revision; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ @@ -302,7 +303,6 @@ struct xt_match unsigned short proto; unsigned short family; - u_int8_t revision; }; /* Registration hooks for targets. */ -- cgit v1.2.3 From 4c696ba7982501d43dea11dbbaabd2aa8a19cc42 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:53 +0100 Subject: [CVE-2009-0029] Move compat system call declarations to compat header file Move declarations to correct header file. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 13 +++++++++++++ include/linux/syscalls.h | 12 ------------ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index e88f3ecf38b4..3fd2194ff573 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -280,5 +280,18 @@ asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, asmlinkage long compat_sys_timerfd_gettime(int ufd, struct compat_itimerspec __user *otmr); +asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, + __u32 __user *pages, + const int __user *nodes, + int __user *status, + int flags); +asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, + struct compat_timeval __user *t); +asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, + struct compat_stat __user *statbuf, + int flag); +asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, + int flags, int mode); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 18d0a243a7b3..a7593f670ca6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -530,11 +530,6 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags); -asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, - __u32 __user *pages, - const int __user *nodes, - int __user *status, - int flags); asmlinkage long sys_mbind(unsigned long start, unsigned long len, unsigned long mode, unsigned long __user *nmask, @@ -583,13 +578,6 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *bu int bufsiz); asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __user *utimes, int flags); -asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, - struct compat_timeval __user *t); -asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, - struct compat_stat __user *statbuf, - int flag); -asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, - int flags, int mode); asmlinkage long sys_unshare(unsigned long unshare_flags); asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, -- cgit v1.2.3 From 2ed7c03ec17779afb4fcfa3b8c61df61bd4879ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:54 +0100 Subject: [CVE-2009-0029] Convert all system calls to return a long Convert all system calls to return a long. This should be a NOP since all converted types should have the same size anyway. With the exception of sys_exit_group which returned void. But that doesn't matter since the system call doesn't return. Signed-off-by: Heiko Carstens --- include/linux/syscalls.h | 79 ++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a7593f670ca6..22290eeaf553 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -77,7 +77,7 @@ asmlinkage long sys_times(struct tms __user *tbuf); asmlinkage long sys_gettid(void); asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp); -asmlinkage unsigned long sys_alarm(unsigned int seconds); +asmlinkage long sys_alarm(unsigned int seconds); asmlinkage long sys_getpid(void); asmlinkage long sys_getppid(void); asmlinkage long sys_getuid(void); @@ -166,7 +166,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, unsigned long flags); asmlinkage long sys_exit(int error_code); -asmlinkage void sys_exit_group(int error_code); +asmlinkage long sys_exit_group(int error_code); asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, int options, struct rusage __user *ru); asmlinkage long sys_waitid(int which, pid_t pid, @@ -196,7 +196,7 @@ asmlinkage long sys_tkill(int pid, int sig); asmlinkage long sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo); asmlinkage long sys_sgetmask(void); asmlinkage long sys_ssetmask(int newmask); -asmlinkage unsigned long sys_signal(int sig, __sighandler_t handler); +asmlinkage long sys_signal(int sig, __sighandler_t handler); asmlinkage long sys_pause(void); asmlinkage long sys_sync(void); @@ -246,29 +246,29 @@ asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags); -asmlinkage ssize_t sys_getxattr(const char __user *path, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_lgetxattr(const char __user *path, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_fgetxattr(int fd, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_listxattr(const char __user *path, char __user *list, - size_t size); -asmlinkage ssize_t sys_llistxattr(const char __user *path, char __user *list, - size_t size); -asmlinkage ssize_t sys_flistxattr(int fd, char __user *list, size_t size); +asmlinkage long sys_getxattr(const char __user *path, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_fgetxattr(int fd, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_listxattr(const char __user *path, char __user *list, + size_t size); +asmlinkage long sys_llistxattr(const char __user *path, char __user *list, + size_t size); +asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size); asmlinkage long sys_removexattr(const char __user *path, const char __user *name); asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); -asmlinkage unsigned long sys_brk(unsigned long brk); +asmlinkage long sys_brk(unsigned long brk); asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot); -asmlinkage unsigned long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr); +asmlinkage long sys_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); @@ -321,10 +321,10 @@ asmlinkage long sys_io_submit(aio_context_t, long, struct iocb __user * __user *); asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, struct io_event __user *result); -asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, - off_t __user *offset, size_t count); -asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, - loff_t __user *offset, size_t count); +asmlinkage long sys_sendfile(int out_fd, int in_fd, + off_t __user *offset, size_t count); +asmlinkage long sys_sendfile64(int out_fd, int in_fd, + loff_t __user *offset, size_t count); asmlinkage long sys_readlink(const char __user *path, char __user *buf, int bufsiz); asmlinkage long sys_creat(const char __user *pathname, int mode); @@ -368,26 +368,25 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times); asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes); -asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, - unsigned int origin); +asmlinkage long sys_lseek(unsigned int fd, off_t offset, + unsigned int origin); asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, unsigned long offset_low, loff_t __user *result, unsigned int origin); -asmlinkage ssize_t sys_read(unsigned int fd, char __user *buf, - size_t count); -asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); -asmlinkage ssize_t sys_readv(unsigned long fd, - const struct iovec __user *vec, - unsigned long vlen); -asmlinkage ssize_t sys_write(unsigned int fd, const char __user *buf, - size_t count); -asmlinkage ssize_t sys_writev(unsigned long fd, - const struct iovec __user *vec, - unsigned long vlen); -asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, - size_t count, loff_t pos); -asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, - size_t count, loff_t pos); +asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count); +asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); +asmlinkage long sys_readv(unsigned long fd, + const struct iovec __user *vec, + unsigned long vlen); +asmlinkage long sys_write(unsigned int fd, const char __user *buf, + size_t count); +asmlinkage long sys_writev(unsigned long fd, + const struct iovec __user *vec, + unsigned long vlen); +asmlinkage long sys_pread64(unsigned int fd, char __user *buf, + size_t count, loff_t pos); +asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, + size_t count, loff_t pos); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, int mode); asmlinkage long sys_chdir(const char __user *filename); @@ -476,7 +475,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr); asmlinkage long sys_mq_unlink(const char __user *name); asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout); -asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); +asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); -- cgit v1.2.3 From e55380edf68796d75bf41391a781c68ee678587d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:55 +0100 Subject: [CVE-2009-0029] Rename old_readdir to sys_old_readdir This way it matches the generic system call name convention. Signed-off-by: Heiko Carstens --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22290eeaf553..ca079c3d09e3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,6 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; +struct old_linux_dirent; #include #include @@ -608,6 +609,7 @@ asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); +asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit v1.2.3 From 1a94bc34768e463a93cb3751819709ab0ea80a01 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:59 +0100 Subject: [CVE-2009-0029] System call wrapper infrastructure From: Martin Schwidefsky By selecting HAVE_SYSCALL_WRAPPERS architectures can activate system call wrappers in order to sign extend system call arguments. All architectures where the ABI defines that the caller of a function has to perform sign extension probably need this. Reported-by: Christian Borntraeger Acked-by: Ralf Baechle Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- include/linux/syscalls.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ca079c3d09e3..0bb537d7ba2e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -66,6 +66,68 @@ struct old_linux_dirent; #include #include +#define __SC_DECL1(t1, a1) t1 a1 +#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) +#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__) +#define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__) +#define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__) +#define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__) + +#define __SC_LONG1(t1, a1) long a1 +#define __SC_LONG2(t2, a2, ...) long a2, __SC_LONG1(__VA_ARGS__) +#define __SC_LONG3(t3, a3, ...) long a3, __SC_LONG2(__VA_ARGS__) +#define __SC_LONG4(t4, a4, ...) long a4, __SC_LONG3(__VA_ARGS__) +#define __SC_LONG5(t5, a5, ...) long a5, __SC_LONG4(__VA_ARGS__) +#define __SC_LONG6(t6, a6, ...) long a6, __SC_LONG5(__VA_ARGS__) + +#define __SC_CAST1(t1, a1) (t1) a1 +#define __SC_CAST2(t2, a2, ...) (t2) a2, __SC_CAST1(__VA_ARGS__) +#define __SC_CAST3(t3, a3, ...) (t3) a3, __SC_CAST2(__VA_ARGS__) +#define __SC_CAST4(t4, a4, ...) (t4) a4, __SC_CAST3(__VA_ARGS__) +#define __SC_CAST5(t5, a5, ...) (t5) a5, __SC_CAST4(__VA_ARGS__) +#define __SC_CAST6(t6, a6, ...) (t6) a6, __SC_CAST5(__VA_ARGS__) + +#define __SC_TEST(type) BUILD_BUG_ON(sizeof(type) > sizeof(long)) +#define __SC_TEST1(t1, a1) __SC_TEST(t1) +#define __SC_TEST2(t2, a2, ...) __SC_TEST(t2); __SC_TEST1(__VA_ARGS__) +#define __SC_TEST3(t3, a3, ...) __SC_TEST(t3); __SC_TEST2(__VA_ARGS__) +#define __SC_TEST4(t4, a4, ...) __SC_TEST(t4); __SC_TEST3(__VA_ARGS__) +#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) +#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) + +#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) +#define SYSCALL_DEFINE1(...) SYSCALL_DEFINEx(1, __VA_ARGS__) +#define SYSCALL_DEFINE2(...) SYSCALL_DEFINEx(2, __VA_ARGS__) +#define SYSCALL_DEFINE3(...) SYSCALL_DEFINEx(3, __VA_ARGS__) +#define SYSCALL_DEFINE4(...) SYSCALL_DEFINEx(4, __VA_ARGS__) +#define SYSCALL_DEFINE5(...) SYSCALL_DEFINEx(5, __VA_ARGS__) +#define SYSCALL_DEFINE6(...) SYSCALL_DEFINEx(6, __VA_ARGS__) + +#define SYSCALL_ALIAS(alias, name) \ + asm ("\t.globl " #alias "\n\t.set " #alias ", " #name) + +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS + +#define SYSCALL_DEFINE(name) static inline long SYSC_##name +#define SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__)); \ + static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__)); \ + asmlinkage long SyS_##name(__SC_LONG##x(__VA_ARGS__)) \ + { \ + __SC_TEST##x(__VA_ARGS__); \ + return (long) SYSC_##name(__SC_CAST##x(__VA_ARGS__)); \ + } \ + SYSCALL_ALIAS(sys_##name, SyS_##name); \ + static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__)) + +#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + +#define SYSCALL_DEFINE(name) asmlinkage long sys_##name +#define SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__)) + +#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + asmlinkage long sys_time(time_t __user *tloc); asmlinkage long sys_stime(time_t __user *tptr); asmlinkage long sys_gettimeofday(struct timeval __user *tv, -- cgit v1.2.3 From ee6a093222549ac0c72cfd296c69fa5e7d6daa34 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 14:14:00 +0100 Subject: [CVE-2009-0029] powerpc: Enable syscall wrappers for 64-bit This enables the use of syscall wrappers to do proper sign extension for 64-bit programs. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Heiko Carstens --- include/linux/syscalls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0bb537d7ba2e..90aa5eba87a2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -103,8 +103,14 @@ struct old_linux_dirent; #define SYSCALL_DEFINE5(...) SYSCALL_DEFINEx(5, __VA_ARGS__) #define SYSCALL_DEFINE6(...) SYSCALL_DEFINEx(6, __VA_ARGS__) +#ifdef CONFIG_PPC64 +#define SYSCALL_ALIAS(alias, name) \ + asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ + "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) +#else #define SYSCALL_ALIAS(alias, name) \ asm ("\t.globl " #alias "\n\t.set " #alias ", " #name) +#endif #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -- cgit v1.2.3 From d4e82042c4cfa87a7d51710b71f568fe80132551 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:34 +0100 Subject: [CVE-2009-0029] System call wrappers part 32 Signed-off-by: Heiko Carstens --- include/linux/syscalls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 90aa5eba87a2..56c400138b05 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -678,6 +678,13 @@ asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); +asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, + fd_set __user *, struct timespec __user *, + void __user *); +asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, + struct timespec __user *, const sigset_t __user *, + size_t); +asmlinkage long sys_pipe2(int __user *, int); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit v1.2.3 From 2b66421995d2e93c9d1a0111acf2581f8529c6e5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:35 +0100 Subject: [CVE-2009-0029] System call wrappers part 33 Signed-off-by: Heiko Carstens --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 56c400138b05..16875f89e6a7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -685,6 +685,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, struct timespec __user *, const sigset_t __user *, size_t); asmlinkage long sys_pipe2(int __user *, int); +asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit v1.2.3 From 18e6959c385f3edf3991fa6662a53dac4eb10d5b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 14 Jan 2009 07:28:16 +0100 Subject: mm: fix assertion This assertion is incorrect for lockless pagecache. By definition if we have an unpinned page that we are trying to take a speculative reference to, it may become the tail of a compound page at any time (if it is freed, then reallocated as a compound page). It was still a valid assertion for the vmscan.c LRU isolation case, but it doesn't seem incredibly helpful... if somebody wants it, they can put it back directly where it applies in the vmscan code. Signed-off-by: Nick Piggin Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index b91a73fd1bcc..e8ddc98b8405 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -260,7 +260,6 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { - VM_BUG_ON(PageTail(page)); return atomic_inc_not_zero(&page->_count); } -- cgit v1.2.3 From b94b898f3107046b5c97c556e23529283ea5eadd Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Wed, 14 Jan 2009 19:19:02 +0100 Subject: it821x: Add ultra_mask quirk for Vortex86SX On Vortex86SX with IDE controller revision 0x11 ultra DMA must be disabled. This patch was tested by DMP and seems to work. It is a cleaned up version of their older Kernel patch: http://www.dmp.com.tw/tech/vortex86sx/patch-2.6.24-DMP.gz Tested-by: Shawn Lin Signed-off-by: Brandon Philips Cc: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d543365518ab..d56ad9c21c09 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2174,6 +2174,7 @@ #define PCI_DEVICE_ID_RDC_R6040 0x6040 #define PCI_DEVICE_ID_RDC_R6060 0x6060 #define PCI_DEVICE_ID_RDC_R6061 0x6061 +#define PCI_DEVICE_ID_RDC_D1010 0x1010 #define PCI_VENDOR_ID_LENOVO 0x17aa -- cgit v1.2.3 From e720b9e498b6bbb1b4f3b3d2f8e9a78578aafef7 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 14 Jan 2009 19:19:04 +0100 Subject: IDE: fix sparse signed-ness errors with host->host_busy The host_busy field in struct ide_host defaults to a signed-long, where most arch's test_and_set_bit_* macros use an unsigned long. Change to using an unsigned long, which on ARM removes the following sparse errors: drivers/ide/ide-io.c:681:8: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:681:8: expected unsigned long volatile *p drivers/ide/ide-io.c:681:8: got long volatile * drivers/ide/ide-io.c:681:8: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:681:8: expected unsigned long volatile *p drivers/ide/ide-io.c:681:8: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * Signed-off-by: Ben Dooks Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 3644f6323384..194da5a4b0d6 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -871,7 +871,7 @@ struct ide_host { ide_hwif_t *cur_port; /* for hosts requiring serialization */ /* used for hosts requiring serialization */ - volatile long host_busy; + volatile unsigned long host_busy; }; #define IDE_HOST_BUSY 0 -- cgit v1.2.3 From 74d96f018673759d04d032c137d132f6447bfb1e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 13 Jan 2009 19:27:09 -0800 Subject: byteorder: make swab.h include asm/swab.h like a regular header Add swab.h to kbuild.asm and remove the individual entries from each arch, mark as unifdef as some arches have some kernel-only bits inside. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- include/linux/swab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swab.h b/include/linux/swab.h index be5284d4a053..ea0c02fd5163 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -3,7 +3,7 @@ #include #include -#include +#include /* * casts are necessary for constants, because we never know how for sure -- cgit v1.2.3 From 937f1ba56b4be37d9e2ad77412f95048662058d2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 21:05:05 -0800 Subject: net: Add init_dummy_netdev() and fix EMAC driver using it This adds an init_dummy_netdev() function that gets a network device structure (allocation and lifetime entirely under caller's control) and initialize the minimum amount of fields so it can be used to schedule NAPI polls without registering a full blown interface. This is to be used by drivers that need to tie several hardware interfaces to a single NAPI poll scheduler due to HW limitations. It also updates the ibm_newemac driver to use that, this fixing the oops on 2.6.29 due to passing NULL as "dev" to netif_napi_add() Symbol is exported GPL only a I don't think we want binary drivers doing that sort of acrobatics (if we want them at all). Signed-off-by: Benjamin Herrenschmidt Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4647604c7ca9..ec54785d34f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -795,6 +795,7 @@ struct net_device NETREG_UNREGISTERING, /* called unregister_netdevice */ NETREG_UNREGISTERED, /* completed unregister todo */ NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ } reg_state; /* Called from unregister, can be used to call free_netdev */ @@ -1077,6 +1078,8 @@ extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); +extern int init_dummy_netdev(struct net_device *dev); + extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); -- cgit v1.2.3 From 45ce80fb6b6f9594d1396d44dd7e7c02d596fef8 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:50:59 -0800 Subject: cgroups: consolidate cgroup documents Move Documentation/cpusets.txt and Documentation/controllers/* to Documentation/cgroups/ Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index dede0a2cfc45..4c5bcf6ca7e8 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -9,7 +9,7 @@ * * Author: Pavel Emelianov * - * See Documentation/controllers/resource_counter.txt for more + * See Documentation/cgroups/resource_counter.txt for more * info about what this counter is. */ -- cgit v1.2.3 From 3eabdb76a03bbe8f556162738c264dbfb24cff6a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jan 2009 13:51:01 -0800 Subject: jbd: fix missing kernel-doc Fix jbd header file kernel-doc notation: Warning(linux-2.6.28-git13//include/linux/jbd.h:823): No description found for parameter 'j_average_commit_time' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 6384b19efe64..64246dce5663 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -614,6 +614,8 @@ struct transaction_s * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the * number that will fit in j_blocksize * @j_last_sync_writer: most recent pid which did a synchronous write + * @j_average_commit_time: the average amount of time in nanoseconds it + * takes to commit a transaction to the disk. * @j_private: An opaque pointer to fs-private information. */ -- cgit v1.2.3 From 6ae301e85c9c58d2f430a8a7057ce488b7ff76df Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jan 2009 13:51:01 -0800 Subject: resources: fix parameter name and kernel-doc Fix __request_region() parameter kernel-doc notation and parameter name: Warning(linux-2.6.28-git10//kernel/resource.c:627): No description found for parameter 'flags' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index f6bb2ca8e3ba..32e4b2f72294 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -143,7 +143,8 @@ static inline unsigned long resource_type(struct resource *res) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name, int relaxed); + resource_size_t n, + const char *name, int flags); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) -- cgit v1.2.3 From 1bcbf31337391a2f54ef6c1e8871c2de5944a7dc Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Thu, 15 Jan 2009 13:51:03 -0800 Subject: btrfs & squashfs: Move btrfs and squashfsto's magic number to Use the standard magic.h for btrfs and squashfs. Signed-off-by: Qinghuang Feng Cc: Phillip Lougher Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/magic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/magic.h b/include/linux/magic.h index 439f6f3cb0c4..0b4df7eba852 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -10,11 +10,13 @@ #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 #define TMPFS_MAGIC 0x01021994 +#define SQUASHFS_MAGIC 0x73717368 #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 #define XENFS_SUPER_MAGIC 0xabba1974 #define EXT4_SUPER_MAGIC 0xEF53 +#define BTRFS_SUPER_MAGIC 0x9123683E #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 -- cgit v1.2.3 From 00bfddaf7f68a6551319b536f052040c370756b0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 15 Jan 2009 13:51:26 -0800 Subject: include of is preferred over Impact: fix 15 make headers_check warnings: include of is preferred over Signed-off-by: Jaswinder Singh Rajput Cc: Ingo Molnar Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/agpgart.h | 1 - include/linux/atm_idt77105.h | 2 +- include/linux/capi.h | 2 +- include/linux/connector.h | 2 +- include/linux/cyclades.h | 2 -- include/linux/fb.h | 2 +- include/linux/if_pppol2tp.h | 2 +- include/linux/if_pppox.h | 2 +- include/linux/input.h | 2 +- include/linux/joystick.h | 2 +- include/linux/kvm.h | 2 +- include/linux/loop.h | 2 +- include/linux/matroxfb.h | 2 +- include/linux/phantom.h | 2 +- include/linux/radeonfb.h | 2 +- 15 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h index c8fdb6e658e1..110c600c885f 100644 --- a/include/linux/agpgart.h +++ b/include/linux/agpgart.h @@ -52,7 +52,6 @@ #ifndef __KERNEL__ #include -#include struct agp_version { __u16 major; diff --git a/include/linux/atm_idt77105.h b/include/linux/atm_idt77105.h index 05621cf20709..8b724000aa50 100644 --- a/include/linux/atm_idt77105.h +++ b/include/linux/atm_idt77105.h @@ -7,7 +7,7 @@ #ifndef LINUX_ATM_IDT77105_H #define LINUX_ATM_IDT77105_H -#include +#include #include #include diff --git a/include/linux/capi.h b/include/linux/capi.h index fdebaaa9f66e..65100d6cb89b 100644 --- a/include/linux/capi.h +++ b/include/linux/capi.h @@ -12,7 +12,7 @@ #ifndef __LINUX_CAPI_H__ #define __LINUX_CAPI_H__ -#include +#include #include #ifndef __KERNEL__ #include diff --git a/include/linux/connector.h b/include/linux/connector.h index 5c7f9468f753..34f2789d9b9b 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -22,7 +22,7 @@ #ifndef __CONNECTOR_H #define __CONNECTOR_H -#include +#include #define CN_IDX_CONNECTOR 0xffffffff #define CN_VAL_CONNECTOR 0xffffffff diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 2d3d1e04ba92..d06fbf286346 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -150,8 +150,6 @@ struct CYZ_BOOT_CTRL { * architectures and compilers. */ -#include - typedef __u64 ucdouble; /* 64 bits, unsigned */ typedef __u32 uclong; /* 32 bits, unsigned */ typedef __u16 ucshort; /* 16 bits, unsigned */ diff --git a/include/linux/fb.h b/include/linux/fb.h index 1ee63df5be92..818fe21257e8 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -1,7 +1,7 @@ #ifndef _LINUX_FB_H #define _LINUX_FB_H -#include +#include #include struct dentry; diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h index a7d6a2234b31..c7a66882b6d0 100644 --- a/include/linux/if_pppol2tp.h +++ b/include/linux/if_pppol2tp.h @@ -15,7 +15,7 @@ #ifndef __LINUX_IF_PPPOL2TP_H #define __LINUX_IF_PPPOL2TP_H -#include +#include #ifdef __KERNEL__ #include diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 6fb7f1788570..30c88b2245ff 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -17,7 +17,7 @@ #define __LINUX_IF_PPPOX_H -#include +#include #include #ifdef __KERNEL__ diff --git a/include/linux/input.h b/include/linux/input.h index 9a6355f74db2..1249a0c20a38 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #endif /* diff --git a/include/linux/joystick.h b/include/linux/joystick.h index b5e051295a67..9e20c29c1e14 100644 --- a/include/linux/joystick.h +++ b/include/linux/joystick.h @@ -27,7 +27,7 @@ * Vojtech Pavlik, Ucitelska 1576, Prague 8, 182 00 Czech Republic */ -#include +#include #include /* diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 35525ac63337..5715f1907601 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -7,7 +7,7 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ -#include +#include #include #include #include diff --git a/include/linux/loop.h b/include/linux/loop.h index 46169a7b559b..6ffd6db5bb0d 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -80,7 +80,7 @@ enum { }; #include /* for __kernel_old_dev_t */ -#include /* for __u64 */ +#include /* for __u64 */ /* Backwards compatibility version */ struct loop_info { diff --git a/include/linux/matroxfb.h b/include/linux/matroxfb.h index ae5b09493062..404f678e734b 100644 --- a/include/linux/matroxfb.h +++ b/include/linux/matroxfb.h @@ -2,7 +2,7 @@ #define __LINUX_MATROXFB_H__ #include -#include +#include #include struct matroxioc_output_mode { diff --git a/include/linux/phantom.h b/include/linux/phantom.h index 02268c54c250..94dd6645c60a 100644 --- a/include/linux/phantom.h +++ b/include/linux/phantom.h @@ -10,7 +10,7 @@ #ifndef __PHANTOM_H #define __PHANTOM_H -#include +#include /* PHN_(G/S)ET_REG param */ struct phm_reg { diff --git a/include/linux/radeonfb.h b/include/linux/radeonfb.h index 5bd8975ed78e..8c4bbdecc44f 100644 --- a/include/linux/radeonfb.h +++ b/include/linux/radeonfb.h @@ -2,7 +2,7 @@ #define __LINUX_RADEONFB_H__ #include -#include +#include #define ATY_RADEON_LCD_ON 0x00000001 #define ATY_RADEON_CRT_ON 0x00000002 -- cgit v1.2.3