From cf1c4a094f46ace5bf00078e2db73491ddf018fe Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Tue, 19 Feb 2013 11:35:59 -0800 Subject: netfilter: ipset: timeout values corrupted on set resize If a resize is triggered on a set with timeouts enabled, the timeout values will get corrupted when copying them to the new set. This occured b/c the wrong timeout value is supplied to type_pf_elem_tadd(). This also adds simple debug statement similar to the one in type_pf_resize(). Signed-off-by: Josh Hunt Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_ahash.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h index ef9acd3c8450..01d25e6fc792 100644 --- a/include/linux/netfilter/ipset/ip_set_ahash.h +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -854,6 +854,8 @@ type_pf_tresize(struct ip_set *set, bool retried) retry: ret = 0; htable_bits++; + pr_debug("attempt to resize set %s from %u to %u, t %p\n", + set->name, orig->htable_bits, htable_bits, orig); if (!htable_bits) { /* In case we have plenty of memory :-) */ pr_warning("Cannot increase the hashsize of set %s further\n", @@ -873,7 +875,7 @@ retry: data = ahash_tdata(n, j); m = hbucket(t, HKEY(data, h->initval, htable_bits)); ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), 0, - type_pf_data_timeout(data)); + ip_set_timeout_get(type_pf_data_timeout(data))); if (ret < 0) { read_unlock_bh(&set->lock); ahash_destroy(t); -- cgit v1.2.3 From 4d4c4e24cf48400a24d33feffc7cca4f4e8cabe1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 22 Feb 2013 00:05:07 +0100 Subject: irq: Remove IRQ_EXIT_OFFSET workaround The IRQ_EXIT_OFFSET trick was used to make sure the irq doesn't get preempted after we substract the HARDIRQ_OFFSET until we are entirely done with any code in irq_exit(). This workaround was necessary because some archs may call irq_exit() with irqs enabled and there is still some code in the end of this function that is not covered by the HARDIRQ_OFFSET but want to stay non-preemptible. Now that irq are always disabled in irq_exit(), the whole code is guaranteed not to be preempted. We can thus remove this hack. Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Paul E. McKenney --- include/linux/hardirq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 29eb805ea4a6..c1d6555d2567 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -118,10 +118,8 @@ #ifdef CONFIG_PREEMPT_COUNT # define preemptible() (preempt_count() == 0 && !irqs_disabled()) -# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) #else # define preemptible() 0 -# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET #endif #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) -- cgit v1.2.3 From 46c498c2cdee5efe44f617bcd4f388179be36115 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Feb 2013 18:44:33 +0100 Subject: stop_machine: Mark per cpu stopper enabled early commit 14e568e78 (stop_machine: Use smpboot threads) introduced the following regression: Before this commit the stopper enabled bit was set in the online notifier. CPU0 CPU1 cpu_up cpu online hotplug_notifier(ONLINE) stopper(CPU1)->enabled = true; ... stop_machine() The conversion to smpboot threads moved the enablement to the wakeup path of the parked thread. The majority of users seem to have the following working order: CPU0 CPU1 cpu_up cpu online unpark_threads() wakeup(stopper[CPU1]) .... stopper thread runs stopper(CPU1)->enabled = true; stop_machine() But Konrad and Sander have observed: CPU0 CPU1 cpu_up cpu online unpark_threads() wakeup(stopper[CPU1]) .... stop_machine() stopper thread runs stopper(CPU1)->enabled = true; Now the stop machinery kicks CPU0 into the stop loop, where it gets stuck forever because the queue code saw stopper(CPU1)->enabled == false, so CPU0 waits for CPU1 to enter stomp_machine, but the CPU1 stopper work got discarded due to enabled == false. Add a pre_unpark function to the smpboot thread descriptor and call it before waking the thread. This fixes the problem at hand, but the stop_machine code should be more robust. The stopper->enabled flag smells fishy at best. Thanks to Konrad for going through a loop of debug patches and providing the information to decode this issue. Reported-and-tested-by: Konrad Rzeszutek Wilk Reported-and-tested-by: Sander Eikelenboom Cc: Srivatsa S. Bhat Cc: Rusty Russell Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1302261843240.22263@ionos Signed-off-by: Thomas Gleixner --- include/linux/smpboot.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index c65dee059913..13e929679550 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -24,6 +24,9 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) + * @pre_unpark: Optional unpark function, called before the thread is + * unparked (cpu online). This is not guaranteed to be + * called on the target cpu of the thread. Careful! * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ @@ -37,6 +40,7 @@ struct smp_hotplug_thread { void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); + void (*pre_unpark)(unsigned int cpu); bool selfparking; const char *thread_comm; }; -- cgit v1.2.3 From 79ffef1fe213851f44bfccf037170a140e929f85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Feb 2013 07:05:03 +0000 Subject: tcp: avoid wakeups for pure ACK TCP prequeue mechanism purpose is to let incoming packets being processed by the thread currently blocked in tcp_recvmsg(), instead of behalf of the softirq handler, to better adapt flow control on receiver host capacity to schedule the consumer. But in typical request/answer workloads, we send request, then block to receive the answer. And before the actual answer, TCP stack receives the ACK packets acknowledging the request. Processing pure ACK on behalf of the thread blocked in tcp_recvmsg() is a waste of resources, as thread has to immediately sleep again because it got no payload. This patch avoids the extra context switches and scheduler overhead. Before patch : a:~# echo 0 >/proc/sys/net/ipv4/tcp_low_latency a:~# perf stat ./super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k 231676 Performance counter stats for './super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k': 116251.501765 task-clock # 11.369 CPUs utilized 5,025,463 context-switches # 0.043 M/sec 1,074,511 CPU-migrations # 0.009 M/sec 216,923 page-faults # 0.002 M/sec 311,636,972,396 cycles # 2.681 GHz 260,507,138,069 stalled-cycles-frontend # 83.59% frontend cycles idle 155,590,092,840 stalled-cycles-backend # 49.93% backend cycles idle 100,101,255,411 instructions # 0.32 insns per cycle # 2.60 stalled cycles per insn 16,535,930,999 branches # 142.243 M/sec 646,483,591 branch-misses # 3.91% of all branches 10.225482774 seconds time elapsed After patch : a:~# echo 0 >/proc/sys/net/ipv4/tcp_low_latency a:~# perf stat ./super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k 233297 Performance counter stats for './super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k': 91084.870855 task-clock # 8.887 CPUs utilized 2,485,916 context-switches # 0.027 M/sec 815,520 CPU-migrations # 0.009 M/sec 216,932 page-faults # 0.002 M/sec 245,195,022,629 cycles # 2.692 GHz 202,635,777,041 stalled-cycles-frontend # 82.64% frontend cycles idle 124,280,372,407 stalled-cycles-backend # 50.69% backend cycles idle 83,457,289,618 instructions # 0.34 insns per cycle # 2.43 stalled cycles per insn 13,431,472,361 branches # 147.461 M/sec 504,470,665 branch-misses # 3.76% of all branches 10.249594448 seconds time elapsed Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Cc: Yuchung Cheng Cc: Andi Kleen Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 23f2e98d4b65..cf0694d4ad60 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1045,6 +1045,10 @@ static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) if (sysctl_tcp_low_latency || !tp->ucopy.task) return false; + if (skb->len <= tcp_hdrlen(skb) && + skb_queue_len(&tp->ucopy.prequeue) == 0) + return false; + __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; if (tp->ucopy.memory > sk->sk_rcvbuf) { -- cgit v1.2.3 From 5838b032fd69ae47565ddc50062decf9055e1628 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 28 Feb 2013 18:12:47 -0600 Subject: regulator: core: update kernel documentation for regulator_desc commit df367931 (regulator: core: Provide regmap get/set bypass operations) introduced regulator_[gs]et_bypass_regmap However structure documentation for regulator_desc needs an update. ./scripts/kernel-doc include/linux/regulator/driver.h >/dev/null generates: Warning(include/linux/regulator/driver.h:233): No description found for parameter 'bypass_reg' Warning(include/linux/regulator/driver.h:233): No description found for parameter 'bypass_mask' Cc: Liam Girdwood Cc: Mark Brown Cc: linux-kernel@vger.kernel.org Signed-off-by: Nishanth Menon Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 23070fd83872..7df93f52db08 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -199,6 +199,8 @@ enum regulator_type { * output when using regulator_set_voltage_sel_regmap * @enable_reg: Register for control when using regmap enable/disable ops * @enable_mask: Mask for control when using regmap enable/disable ops + * @bypass_reg: Register for control when using regmap set_bypass + * @bypass_mask: Mask for control when using regmap set_bypass * * @enable_time: Time taken for initial enable of regulator (in uS). */ -- cgit v1.2.3 From 7f78e0351394052e1a6293e175825eb5c7869507 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 2 Mar 2013 19:39:14 -0800 Subject: fs: Limit sys_mount to only request filesystem modules. Modify the request_module to prefix the file system type with "fs-" and add aliases to all of the filesystems that can be built as modules to match. A common practice is to build all of the kernel code and leave code that is not commonly needed as modules, with the result that many users are exposed to any bug anywhere in the kernel. Looking for filesystems with a fs- prefix limits the pool of possible modules that can be loaded by mount to just filesystems trivially making things safer with no real cost. Using aliases means user space can control the policy of which filesystem modules are auto-loaded by editing /etc/modprobe.d/*.conf with blacklist and alias directives. Allowing simple, safe, well understood work-arounds to known problematic software. This also addresses a rare but unfortunate problem where the filesystem name is not the same as it's module name and module auto-loading would not work. While writing this patch I saw a handful of such cases. The most significant being autofs that lives in the module autofs4. This is relevant to user namespaces because we can reach the request module in get_fs_type() without having any special permissions, and people get uncomfortable when a user specified string (in this case the filesystem type) goes all of the way to request_module. After having looked at this issue I don't think there is any particular reason to perform any filtering or permission checks beyond making it clear in the module request that we want a filesystem module. The common pattern in the kernel is to call request_module() without regards to the users permissions. In general all a filesystem module does once loaded is call register_filesystem() and go to sleep. Which means there is not much attack surface exposed by loading a filesytem module unless the filesystem is mounted. In a user namespace filesystems are not mounted unless .fs_flags = FS_USERNS_MOUNT, which most filesystems do not set today. Acked-by: Serge Hallyn Acked-by: Kees Cook Reported-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 74a907b8b950..2c28271ab9d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,6 +1825,8 @@ struct file_system_type { struct lock_class_key i_mutex_dir_key; }; +#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) + extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, -- cgit v1.2.3 From 290502bee239062499297916bb7d21d205e99d62 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 28 Feb 2013 00:39:37 -0800 Subject: eCryptfs: allow userspace messaging to be disabled When the userspace messaging (for the less common case of userspace key wrap/unwrap via ecryptfsd) is not needed, allow eCryptfs to build with it removed. This saves on kernel code size and reduces potential attack surface by removing the /dev/ecryptfs node. Signed-off-by: Kees Cook Signed-off-by: Tyler Hicks --- include/linux/ecryptfs.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h index 2224a8c0cb64..8d5ab998a222 100644 --- a/include/linux/ecryptfs.h +++ b/include/linux/ecryptfs.h @@ -6,9 +6,8 @@ #define ECRYPTFS_VERSION_MINOR 0x04 #define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03 /* These flags indicate which features are supported by the kernel - * module; userspace tools such as the mount helper read - * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine - * how to behave. */ + * module; userspace tools such as the mount helper read the feature + * bits from a sysfs handle in order to determine how to behave. */ #define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 #define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 #define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 @@ -19,13 +18,6 @@ #define ECRYPTFS_VERSIONING_HMAC 0x00000080 #define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100 #define ECRYPTFS_VERSIONING_GCM 0x00000200 -#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ - | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ - | ECRYPTFS_VERSIONING_PUBKEY \ - | ECRYPTFS_VERSIONING_XATTR \ - | ECRYPTFS_VERSIONING_MULTKEY \ - | ECRYPTFS_VERSIONING_DEVMISC \ - | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 -- cgit v1.2.3 From 53540098b23c3884b4a0b4f220b9d977bc496af3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2013 22:35:20 +0100 Subject: ACPI / glue: Add .match() callback to struct acpi_bus_type USB uses the .find_bridge() callback from struct acpi_bus_type incorrectly, because as a result of the way it is used by USB every device in the system that doesn't have a bus type or parent is passed to usb_acpi_find_device() for inspection. What USB actually needs, though, is to call usb_acpi_find_device() for USB ports that don't have a bus type defined, but have usb_port_device_type as their device type, as well as for USB devices. To fix that replace the struct bus_type pointer in struct acpi_bus_type used for matching devices to specific subsystems with a .match() callback to be used for this purpose and update the users of struct acpi_bus_type, including USB, accordingly. Define the .match() callback routine for USB, usb_acpi_bus_match(), in such a way that it will cover both USB devices and USB ports and remove the now redundant .find_bridge() callback pointer from usb_acpi_bus. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Yinghai Lu Acked-by: Jeff Garzik --- include/acpi/acpi_bus.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e65278f560c4..c751d7de3a5f 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -437,7 +437,8 @@ void acpi_remove_dir(struct acpi_device *); */ struct acpi_bus_type { struct list_head list; - struct bus_type *bus; + const char *name; + bool (*match)(struct device *dev); /* For general devices under the bus */ int (*find_device) (struct device *, acpi_handle *); /* For bridges, such as PCI root bridge, IDE controller */ -- cgit v1.2.3 From 924144818cf0edc5d9d70d3a44e7cbbf4544796c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2013 22:35:44 +0100 Subject: ACPI / glue: Drop .find_bridge() callback from struct acpi_bus_type After PCI and USB have stopped using the .find_bridge() callback in struct acpi_bus_type, the only remaining user of it is SATA, but SATA only pretends to be a user, because it points that callback to a stub always returning -ENODEV. For this reason, drop the SATA's dummy .find_bridge() callback and remove .find_bridge(), which is not used any more, from struct acpi_bus_type entirely. Signed-off-by: Rafael J. Wysocki Acked-by: Yinghai Lu Acked-by: Jeff Garzik --- include/acpi/acpi_bus.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c751d7de3a5f..22ba56e834e2 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -439,10 +439,7 @@ struct acpi_bus_type { struct list_head list; const char *name; bool (*match)(struct device *dev); - /* For general devices under the bus */ int (*find_device) (struct device *, acpi_handle *); - /* For bridges, such as PCI root bridge, IDE controller */ - int (*find_bridge) (struct device *, acpi_handle *); void (*setup)(struct device *); void (*cleanup)(struct device *); }; -- cgit v1.2.3 From 13bcf01b33e6e19a7fe7ff396f9ed02803e225ec Mon Sep 17 00:00:00 2001 From: Christopher Harvey Date: Thu, 7 Mar 2013 10:42:25 -0500 Subject: drm: Documentation typo fixes Signed-off-by: Christopher Harvey Signed-off-by: Dave Airlie --- include/drm/drm_crtc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 8839b3a24660..e3e0d651c6ca 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -443,12 +443,12 @@ struct drm_crtc { * @dpms: set power state (see drm_crtc_funcs above) * @save: save connector state * @restore: restore connector state - * @reset: reset connector after state has been invalidate (e.g. resume) + * @reset: reset connector after state has been invalidated (e.g. resume) * @detect: is this connector active? * @fill_modes: fill mode list for this connector - * @set_property: property for this connector may need update + * @set_property: property for this connector may need an update * @destroy: make object go away - * @force: notify the driver the connector is forced on + * @force: notify the driver that the connector is forced on * * Each CRTC may have one or more connectors attached to it. The functions * below allow the core DRM code to control connectors, enumerate available modes, -- cgit v1.2.3 From 22dfab7fd7fd5a8a2c5556ca0a8fd35fc959abc8 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 7 Mar 2013 19:43:33 -0800 Subject: Input: atmel_mxt_ts - Support for touchpad variant This same driver can be used by atmel based touchscreens and touchpads (buttonpads). Platform data may specify a device is a touchpad using the is_tp flag. This will cause the driver to perform some touchpad specific initializations, such as: * register input device name "Atmel maXTouch Touchpad" instead of Touchscreen. * register BTN_LEFT & BTN_TOOL_* event types. * register axis resolution (as a fixed constant, for now) * register BUTTONPAD property * process GPIO buttons using reportid T19 Input event GPIO mapping is done by the platform data key_map array. key_map[x] should contain the KEY or BTN code to send when processing GPIOx from T19. To specify a GPIO as not an input source, populate with KEY_RESERVED, or 0. Signed-off-by: Daniel Kurtz Signed-off-by: Benson Leung Signed-off-by: Nick Dyer Tested-by: Olof Johansson Signed-off-by: Linus Torvalds --- include/linux/i2c/atmel_mxt_ts.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index f027f7a63511..99e379b74398 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -15,6 +15,9 @@ #include +/* For key_map array */ +#define MXT_NUM_GPIO 4 + /* Orient */ #define MXT_NORMAL 0x0 #define MXT_DIAGONAL 0x1 @@ -39,6 +42,8 @@ struct mxt_platform_data { unsigned int voltage; unsigned char orient; unsigned long irqflags; + bool is_tp; + const unsigned int key_map[MXT_NUM_GPIO]; }; #endif /* __LINUX_ATMEL_MXT_TS_H */ -- cgit v1.2.3 From 49d0de082c31de34cc896c14eec5f1c2ade0415a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 14 Feb 2013 16:42:34 -0800 Subject: rcu: Fix hlist_bl_set_first_rcu() annotation Abhi noticed that we were getting a complaint from the RCU subsystem about access of an RCU protected list under the write side bit lock. This commit adds additional annotation to check both the RCU read lock and the write side bit lock before printing a message. Reported by: Abhijith Das Signed-off-by: Steven Whitehouse Tested-by: Abhijith Das Signed-off-by: Paul E. McKenney --- include/linux/list_bl.h | 5 +++++ include/linux/rculist_bl.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 31f9d75adc5b..2eb88556c5c5 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -125,6 +125,11 @@ static inline void hlist_bl_unlock(struct hlist_bl_head *b) __bit_spin_unlock(0, (unsigned long *)b); } +static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) +{ + return bit_spin_is_locked(0, (unsigned long *)b); +} + /** * hlist_bl_for_each_entry - iterate over list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index cf1244fbf3b6..4f216c59e7db 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -20,7 +20,7 @@ static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) { return (struct hlist_bl_node *) - ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK); + ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); } /** -- cgit v1.2.3 From 09c7b890622d72b5e004cc249bbe610e8b928ddf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 Feb 2013 15:55:02 -0800 Subject: rcu: Add event tracing for no-CBs CPUs' grace periods Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 1918e832da4f..cdfed6d386eb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -71,6 +71,58 @@ TRACE_EVENT(rcu_grace_period, __entry->rcuname, __entry->gpnum, __entry->gpevent) ); +/* + * Tracepoint for no-callbacks grace-period events. The caller should + * pull the data from the rcu_node structure, other than rcuname, which + * comes from the rcu_state structure, and event, which is one of the + * following: + * + * "Startleaf": Request a nocb grace period based on leaf-node data. + * "Startedleaf": Leaf-node start proved sufficient. + * "Startedleafroot": Leaf-node start proved sufficient after checking root. + * "Startedroot": Requested a nocb grace period based on root-node data. + * "StartWait": Start waiting for the requested grace period. + * "ResumeWait": Resume waiting after signal. + * "EndWait": Complete wait. + * "Cleanup": Clean up rcu_node structure after previous GP. + * "CleanupMore": Clean up, and another no-CB GP is needed. + */ +TRACE_EVENT(rcu_nocb_grace_period, + + TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed, + unsigned long c, u8 level, int grplo, int grphi, + char *gpevent), + + TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(unsigned long, completed) + __field(unsigned long, c) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(char *, gpevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->completed = completed; + __entry->c = c; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->gpevent = gpevent; + ), + + TP_printk("%s %lu %lu %lu %u %d %d %s", + __entry->rcuname, __entry->gpnum, __entry->completed, + __entry->c, __entry->level, __entry->grplo, __entry->grphi, + __entry->gpevent) +); + /* * Tracepoint for grace-period-initialization events. These are * distinguished by the type of RCU, the new grace-period number, the @@ -601,6 +653,9 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ qsmask) do { } while (0) +#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \ + level, grplo, grphi, event) \ + do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ -- cgit v1.2.3 From c0f4dfd4f90f1667d234d21f15153ea09a2eaa66 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Dec 2012 11:30:36 -0800 Subject: rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce17b309..9ed2c9a4de45 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +#define ulong2long(a) (*(long *)(&(a))) /* Exported common interfaces */ -- cgit v1.2.3 From bd9f0686fc8c9a01c6850b1c611d1c9ad80b86d6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 21:51:20 -0800 Subject: rcu: Repurpose no-CBs event tracing to future-GP events Dyntick-idle CPUs need to be able to pre-announce their need for grace periods. This can be done using something similar to the mechanism used by no-CB CPUs to announce their need for grace periods. This commit moves in this direction by renaming the no-CBs grace-period event tracing to suit the new future-grace-period needs. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index cdfed6d386eb..59ebcc89f148 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -72,10 +72,10 @@ TRACE_EVENT(rcu_grace_period, ); /* - * Tracepoint for no-callbacks grace-period events. The caller should - * pull the data from the rcu_node structure, other than rcuname, which - * comes from the rcu_state structure, and event, which is one of the - * following: + * Tracepoint for future grace-period events, including those for no-callbacks + * CPUs. The caller should pull the data from the rcu_node structure, + * other than rcuname, which comes from the rcu_state structure, and event, + * which is one of the following: * * "Startleaf": Request a nocb grace period based on leaf-node data. * "Startedleaf": Leaf-node start proved sufficient. @@ -87,7 +87,7 @@ TRACE_EVENT(rcu_grace_period, * "Cleanup": Clean up rcu_node structure after previous GP. * "CleanupMore": Clean up, and another no-CB GP is needed. */ -TRACE_EVENT(rcu_nocb_grace_period, +TRACE_EVENT(rcu_future_grace_period, TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed, unsigned long c, u8 level, int grplo, int grphi, @@ -653,9 +653,9 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ qsmask) do { } while (0) -#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \ - level, grplo, grphi, event) \ - do { } while (0) +#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ + level, grplo, grphi, event) \ + do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ -- cgit v1.2.3