From abd54f028ec30976d6e797e7474ec91d96186a0c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:55 -0500 Subject: kernfs: replace kernfs_node->u.completion with kernfs_root->deactivate_waitq kernfs_node->u.completion is used to notify deactivation completion from kernfs_put_active() to kernfs_deactivate(). We now allow multiple racing removals of the same node and the current removal scheme is no longer correct - kernfs_remove() invocation may return before the node is properly deactivated if it races against another removal. The removal path will be restructured to address the issue. To help such restructure which requires supporting multiple waiters, this patch replaces kernfs_node->u.completion with kernfs_root->deactivate_waitq. This makes deactivation event notifications share a per-root waitqueue_head; however, the wait path is quite cold and this will also allow shaving one pointer off kernfs_node. v2: Refreshed on top of ("kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag"). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5be9f0228a3b..295a3bf642ba 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include struct file; struct dentry; @@ -92,7 +92,6 @@ struct kernfs_node { struct rb_node rb; union { - struct completion *completion; struct kernfs_node *removed_list; } u; @@ -133,6 +132,7 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct ida ino_ida; struct kernfs_dir_ops *dir_ops; + wait_queue_head_t deactivate_waitq; }; struct kernfs_open_file { -- cgit v1.2.3 From 988cd7afb3f37598891ca70b4c6eb914c338c46a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:58 -0500 Subject: kernfs: remove kernfs_addrm_cxt kernfs_addrm_cxt and the accompanying kernfs_addrm_start/finish() were added because there were operations which should be performed outside kernfs_mutex after adding and removing kernfs_nodes. The necessary operations were recorded in kernfs_addrm_cxt and performed by kernfs_addrm_finish(); however, after the recent changes which relocated deactivation and unmapping so that they're performed directly during removal, the only operation kernfs_addrm_finish() performs is kernfs_put(), which can be moved inside the removal path too. This patch moves the kernfs_put() of the base ref to __kernfs_remove() and remove kernfs_addrm_cxt and kernfs_addrm_start/finish(). * kernfs_add_one() is updated to grab and release kernfs_mutex itself. sysfs_addrm_start/finish() invocations around it are removed from all users. * __kernfs_remove() puts an unlinked node directly instead of chaining it to kernfs_addrm_cxt. Its callers are updated to grab and release kernfs_mutex instead of calling kernfs_addrm_start/finish() around it. v2: Rebased on top of "kernfs: associate a new kernfs_node with its parent on creation" which dropped @parent from kernfs_add_one(). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 295a3bf642ba..38646f6096bc 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -91,10 +91,6 @@ struct kernfs_node { struct rb_node rb; - union { - struct kernfs_node *removed_list; - } u; - const void *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ union { -- cgit v1.2.3 From 182fd64b66342219d6fcf2b84d337529d120d95c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:02:59 -0500 Subject: kernfs: remove KERNFS_ACTIVE_REF and add kernfs_lockdep() There currently are two mechanisms gating active ref lockdep annotations - KERNFS_LOCKDEP flag and KERNFS_ACTIVE_REF type mask. The former disables lockdep annotations in kernfs_get/put_active() while the latter disables all of kernfs_deactivate(). While KERNFS_ACTIVE_REF also behaves as an optimization to skip the deactivation step for non-file nodes, the benefit is marginal and it needlessly diverges code paths. Let's drop KERNFS_ACTIVE_REF. While at it, add a test helper kernfs_lockdep() to test KERNFS_LOCKDEP flag so that it's more convenient and the related code can be compiled out when not enabled. v2: Refreshed on top of ("kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag"). As the earlier patch already added KERNFS_LOCKDEP tests to kernfs_deactivate(), those additions are dropped from this patch and the existing ones are simply converted to kernfs_lockdep(). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 38646f6096bc..dc4cd6c04236 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -35,7 +35,6 @@ enum kernfs_node_type { }; #define KERNFS_TYPE_MASK 0x000f -#define KERNFS_ACTIVE_REF KERNFS_FILE #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK enum kernfs_node_flag { -- cgit v1.2.3 From 81c173cb5e87fbb47ccd80630faefe39bbf68449 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:00 -0500 Subject: kernfs: remove KERNFS_REMOVED KERNFS_REMOVED is used to mark half-initialized and dying nodes so that they don't show up in lookups and deny adding new nodes under or renaming it; however, its role overlaps that of deactivation. It's necessary to deny addition of new children while removal is in progress; however, this role considerably intersects with deactivation - KERNFS_REMOVED prevents new children while deactivation prevents new file operations. There's no reason to have them separate making things more complex than necessary. This patch removes KERNFS_REMOVED. * Instead of KERNFS_REMOVED, each node now starts its life deactivated. This means that we now use both atomic_add() and atomic_sub() on KN_DEACTIVATED_BIAS, which is INT_MIN. The compiler generates an overflow warnings when negating INT_MIN as the negation can't be represented as a positive number. Nothing is actually broken but let's bump BIAS by one to avoid the warnings for archs which negates the subtrahend.. * A new helper kernfs_active() which tests whether kn->active >= 0 is added for convenience and lockdep annotation. All KERNFS_REMOVED tests are replaced with negated kernfs_active() tests. * __kernfs_remove() is updated to deactivate, but not drain, all nodes in the subtree instead of setting KERNFS_REMOVED. This removes deactivation from kernfs_deactivate(), which is now renamed to kernfs_drain(). * Sanity check on KERNFS_REMOVED in kernfs_put() is replaced with checks on the active ref. * Some comment style updates in the affected area. v2: Reordered before removal path restructuring. kernfs_active() dropped and kernfs_get/put_active() used instead. RB_EMPTY_NODE() used in the lookup paths. v3: Reverted most of v2 except for creating a new node with KN_DEACTIVATED_BIAS. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index dc4cd6c04236..917bc6c1eb04 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -38,7 +38,6 @@ enum kernfs_node_type { #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK enum kernfs_node_flag { - KERNFS_REMOVED = 0x0010, KERNFS_NS = 0x0020, KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, -- cgit v1.2.3 From 6b0afc2a21726b2d6b6aa441af40cafaf5405cc8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:01 -0500 Subject: kernfs, sysfs, driver-core: implement kernfs_remove_self() and its wrappers Sometimes it's necessary to implement a node which wants to delete nodes including itself. This isn't straightforward because of kernfs active reference. While a file operation is in progress, an active reference is held and kernfs_remove() waits for all such references to drain before completing. For a self-deleting node, this is a deadlock as kernfs_remove() ends up waiting for an active reference that itself is sitting on top of. This currently is worked around in the sysfs layer using sysfs_schedule_callback() which makes such removals asynchronous. While it works, it's rather cumbersome and inherently breaks synchronicity of the operation - the file operation which triggered the operation may complete before the removal is finished (or even started) and the removal may fail asynchronously. If a removal operation is immmediately followed by another operation which expects the specific name to be available (e.g. removal followed by rename onto the same name), there's no way to make the latter operation reliable. The thing is there's no inherent reason for this to be asynchrnous. All that's necessary to do this synchronous is a dedicated operation which drops its own active ref and deactivates self. This patch implements kernfs_remove_self() and its wrappers in sysfs and driver core. kernfs_remove_self() is to be called from one of the file operations, drops the active ref the task is holding, removes the self node, and restores active ref to the dead node so that the ref is balanced afterwards. __kernfs_remove() is updated so that it takes an early exit if the target node is already fully removed so that the active ref restored by kernfs_remove_self() after removal doesn't confuse the deactivation path. This makes implementing self-deleting nodes very easy. The normal removal path doesn't even need to be changed to use kernfs_remove_self() for the self-deleting node. The method can invoke kernfs_remove_self() on itself before proceeding the normal removal path. kernfs_remove() invoked on the node by the normal deletion path will simply be ignored. This will replace sysfs_schedule_callback(). A subtle feature of sysfs_schedule_callback() is that it collapses multiple invocations - even if multiple removals are triggered, the removal callback is run only once. An equivalent effect can be achieved by testing the return value of kernfs_remove_self() - only the one which gets %true return value should proceed with actual deletion. All other instances of kernfs_remove_self() will wait till the enclosing kernfs operation which invoked the winning instance of kernfs_remove_self() finishes and then return %false. This trivially makes all users of kernfs_remove_self() automatically show correct synchronous behavior even when there are multiple concurrent operations - all "echo 1 > delete" instances will finish only after the whole operation is completed by one of the instances. Note that manipulation of active ref is implemented in separate public functions - kernfs_[un]break_active_protection(). kernfs_remove_self() is the only user at the moment but this will be used to cater to more complex cases. v2: For !CONFIG_SYSFS, dummy version kernfs_remove_self() was missing and sysfs_remove_file_self() had incorrect return type. Fix it. Reported by kbuild test bot. v3: kernfs_[un]break_active_protection() separated out from kernfs_remove_self() and exposed as public API. Signed-off-by: Tejun Heo Cc: Alan Stern Cc: kbuild test robot Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ include/linux/kernfs.h | 8 ++++++++ include/linux/sysfs.h | 7 +++++++ 3 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 952b01033c32..1ff3f1697513 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -560,6 +560,8 @@ extern int device_create_file(struct device *device, const struct device_attribute *entry); extern void device_remove_file(struct device *dev, const struct device_attribute *attr); +extern bool device_remove_file_self(struct device *dev, + const struct device_attribute *attr); extern int __must_check device_create_bin_file(struct device *dev, const struct bin_attribute *attr); extern void device_remove_bin_file(struct device *dev, diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 917bc6c1eb04..02ac33435808 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -43,6 +43,8 @@ enum kernfs_node_flag { KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, KERNFS_STATIC_NAME = 0x0200, + KERNFS_SUICIDAL = 0x0400, + KERNFS_SUICIDED = 0x0800, }; /* type-specific structures for kernfs_node union members */ @@ -234,6 +236,9 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); void kernfs_remove(struct kernfs_node *kn); +void kernfs_break_active_protection(struct kernfs_node *kn); +void kernfs_unbreak_active_protection(struct kernfs_node *kn); +bool kernfs_remove_self(struct kernfs_node *kn); int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns); int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, @@ -291,6 +296,9 @@ kernfs_create_link(struct kernfs_node *parent, const char *name, static inline void kernfs_remove(struct kernfs_node *kn) { } +static inline bool kernfs_remove_self(struct kernfs_node *kn) +{ return false; } + static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn, const char *name, const void *ns) { return -ENOSYS; } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 30b2ebee6439..bd96c603ab6c 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -198,6 +198,7 @@ int __must_check sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); +bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr); int __must_check sysfs_create_bin_file(struct kobject *kobj, @@ -301,6 +302,12 @@ static inline void sysfs_remove_file_ns(struct kobject *kobj, { } +static inline bool sysfs_remove_file_self(struct kobject *kobj, + const struct attribute *attr) +{ + return false; +} + static inline void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr) { -- cgit v1.2.3 From ce8b04aa6c9bdf211b921fdd18c040ea29516b97 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:03:05 -0500 Subject: sysfs, driver-core: remove unused {sysfs|device}_schedule_callback_owner() All device_schedule_callback_owner() users are converted to use device_remove_file_self(). Remove now unused {sysfs|device}_schedule_callback_owner(). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 11 +---------- include/linux/sysfs.h | 9 --------- 2 files changed, 1 insertion(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 1ff3f1697513..fb1ba13f7665 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -566,12 +566,6 @@ extern int __must_check device_create_bin_file(struct device *dev, const struct bin_attribute *attr); extern void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); -extern int device_schedule_callback_owner(struct device *dev, - void (*func)(struct device *dev), struct module *owner); - -/* This is a macro to avoid include problems with THIS_MODULE */ -#define device_schedule_callback(dev, func) \ - device_schedule_callback_owner(dev, func, THIS_MODULE) /* device resource management */ typedef void (*dr_release_t)(struct device *dev, void *res); @@ -931,10 +925,7 @@ extern int device_online(struct device *dev); extern struct device *__root_device_register(const char *name, struct module *owner); -/* - * This is a macro to avoid include problems with THIS_MODULE, - * just as per what is done for device_schedule_callback() above. - */ +/* This is a macro to avoid include problems with THIS_MODULE */ #define root_device_register(name) \ __root_device_register(name, THIS_MODULE) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index bd96c603ab6c..14df05415af9 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -178,9 +178,6 @@ struct sysfs_ops { #ifdef CONFIG_SYSFS -int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), - void *data, struct module *owner); - int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns); void sysfs_remove_dir(struct kobject *kobj); int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, @@ -249,12 +246,6 @@ int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ -static inline int sysfs_schedule_callback(struct kobject *kobj, - void (*func)(void *), void *data, struct module *owner) -{ - return -ENOSYS; -} - static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { return 0; -- cgit v1.2.3 From 07c7530dd46728e25e938d0eb291f8085435c365 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:08 -0500 Subject: kernfs: invoke dir_ops while holding active ref of the target node kernfs_dir_ops are currently being invoked without any active reference, which makes it tricky for the invoked operations to determine whether the objects associated those nodes are safe to access and will remain that way for the duration of such operations. kernfs already has active_ref mechanism to deal with this which makes the removal of a given node the synchronization point for gating the file operations. There's no reason for dir_ops to be any different. Update the dir_ops handling so that active_ref is held while the dir_ops are executing. This guarantees that while a dir_ops is executing the target nodes stay alive. As kernfs_dir_ops doesn't have any in-kernel user at this point, this doesn't affect anybody. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 02ac33435808..58a131ddc6a3 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -111,7 +111,8 @@ struct kernfs_node { * kernfs_dir_ops may be specified on kernfs_create_root() to support * directory manipulation syscalls. These optional callbacks are invoked * on the matching syscalls and can perform any kernfs operations which - * don't necessarily have to be the exact operation requested. + * don't necessarily have to be the exact operation requested. An active + * reference is held for each kernfs_node parameter. */ struct kernfs_dir_ops { int (*mkdir)(struct kernfs_node *parent, const char *name, -- cgit v1.2.3 From 90c07c895c87d38db100b6afcb686ab3ef0d6a64 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:09 -0500 Subject: kernfs: rename kernfs_dir_ops to kernfs_syscall_ops We're gonna need non-dir syscall callbacks, which will make dir_ops a misnomer. Let's rename kernfs_dir_ops to kernfs_syscall_ops. This is pure rename. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 58a131ddc6a3..5ddc47450335 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -108,13 +108,13 @@ struct kernfs_node { }; /* - * kernfs_dir_ops may be specified on kernfs_create_root() to support - * directory manipulation syscalls. These optional callbacks are invoked - * on the matching syscalls and can perform any kernfs operations which - * don't necessarily have to be the exact operation requested. An active - * reference is held for each kernfs_node parameter. + * kernfs_syscall_ops may be specified on kernfs_create_root() to support + * syscalls. These optional callbacks are invoked on the matching syscalls + * and can perform any kernfs operations which don't necessarily have to be + * the exact operation requested. An active reference is held for each + * kernfs_node parameter. */ -struct kernfs_dir_ops { +struct kernfs_syscall_ops { int (*mkdir)(struct kernfs_node *parent, const char *name, umode_t mode); int (*rmdir)(struct kernfs_node *kn); @@ -128,7 +128,7 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct ida ino_ida; - struct kernfs_dir_ops *dir_ops; + struct kernfs_syscall_ops *syscall_ops; wait_queue_head_t deactivate_waitq; }; @@ -219,7 +219,7 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); -struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, +struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, void *priv); void kernfs_destroy_root(struct kernfs_root *root); @@ -273,7 +273,7 @@ static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } static inline struct kernfs_root * -kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) +kernfs_create_root(struct kernfs_syscall_ops *scops, void *priv) { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } -- cgit v1.2.3 From 6a7fed4eefddad48224f1c9d534b4e262f0897f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:10 -0500 Subject: kernfs: implement kernfs_syscall_ops->remount_fs() and ->show_options() Add two super_block related syscall callbacks ->remount_fs() and ->show_options() to kernfs_syscall_ops. These simply forward the matching super_operations. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5ddc47450335..5d5b7e947294 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -115,6 +115,9 @@ struct kernfs_node { * kernfs_node parameter. */ struct kernfs_syscall_ops { + int (*remount_fs)(struct kernfs_root *root, int *flags, char *data); + int (*show_options)(struct seq_file *sf, struct kernfs_root *root); + int (*mkdir)(struct kernfs_node *parent, const char *name, umode_t mode); int (*rmdir)(struct kernfs_node *kn); -- cgit v1.2.3 From d35258ef702cca0c4e66d799f8e38b78c02ce8a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:12 -0500 Subject: kernfs: allow nodes to be created in the deactivated state Currently, kernfs_nodes are made visible to userland on creation, which makes it difficult for kernfs users to atomically succeed or fail creation of multiple nodes. In addition, if something fails after creating some nodes, the created nodes might already be in use and their active refs need to be drained for removal, which has the potential to introduce tricky reverse locking dependency on active_ref depending on how the error path is synchronized. This patch introduces per-root flag KERNFS_ROOT_CREATE_DEACTIVATED. If set, all nodes under the root are created in the deactivated state and stay invisible to userland until explicitly enabled by the new kernfs_activate() API. Also, nodes which have never been activated are guaranteed to bypass draining on removal thus allowing error paths to not worry about lockding dependency on active_ref draining. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5d5b7e947294..4520c86f5cb4 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -38,6 +38,7 @@ enum kernfs_node_type { #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK enum kernfs_node_flag { + KERNFS_ACTIVATED = 0x0010, KERNFS_NS = 0x0020, KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, @@ -47,6 +48,11 @@ enum kernfs_node_flag { KERNFS_SUICIDED = 0x0800, }; +/* @flags for kernfs_create_root() */ +enum kernfs_root_flag { + KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, +}; + /* type-specific structures for kernfs_node union members */ struct kernfs_elem_dir { unsigned long subdirs; @@ -128,6 +134,7 @@ struct kernfs_syscall_ops { struct kernfs_root { /* published fields */ struct kernfs_node *kn; + unsigned int flags; /* KERNFS_ROOT_* flags */ /* private fields, do not use outside kernfs proper */ struct ida ino_ida; @@ -223,7 +230,7 @@ void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, - void *priv); + unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, @@ -239,6 +246,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); +void kernfs_activate(struct kernfs_node *kn); void kernfs_remove(struct kernfs_node *kn); void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); @@ -276,7 +284,8 @@ static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } static inline struct kernfs_root * -kernfs_create_root(struct kernfs_syscall_ops *scops, void *priv) +kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, + void *priv) { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } @@ -298,6 +307,8 @@ kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target) { return ERR_PTR(-ENOSYS); } +static inline void kernfs_activate(struct kernfs_node *kn) { } + static inline void kernfs_remove(struct kernfs_node *kn) { } static inline bool kernfs_remove_self(struct kernfs_node *kn) -- cgit v1.2.3 From 4d3773c4bb41ed5228f1ab7a4a52b79e17b10515 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:13 -0500 Subject: kernfs: implement kernfs_ops->atomic_write_len A write to a kernfs_node is buffered through a kernel buffer. Writes <= PAGE_SIZE are performed atomically, while larger ones are executed in PAGE_SIZE chunks. While this is enough for sysfs, cgroup which is scheduled to be converted to use kernfs needs a bit more control over it. This patch adds kernfs_ops->atomic_write_len. If not set (zero), the behavior stays the same. If set, writes upto the size are executed atomically and larger writes are rejected with -E2BIG. A different implementation strategy would be allowing configuring chunking size while making the original write size available to the write method; however, such strategy, while being more complicated, doesn't really buy anything. If the write implementation has to handle chunking, the specific chunk size shouldn't matter all that much. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 4520c86f5cb4..47f5235a097a 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -178,9 +178,13 @@ struct kernfs_ops { loff_t off); /* - * write() is bounced through kernel buffer and a write larger than - * PAGE_SIZE results in partial operation of PAGE_SIZE. + * write() is bounced through kernel buffer. If atomic_write_len + * is not set, a write larger than PAGE_SIZE results in partial + * operations of PAGE_SIZE chunks. If atomic_write_len is set, + * writes upto the specified size are executed atomically but + * larger ones are rejected with -E2BIG. */ + size_t atomic_write_len; ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); -- cgit v1.2.3 From 2536390da0d300b2734c721235c082498879841d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:14 -0500 Subject: kernfs: add kernfs_open_file->priv Add a private data field to be used by kernfs file operations. This generally makes sense and will be used by cgroup. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 47f5235a097a..9ca0f09757a1 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -146,6 +146,7 @@ struct kernfs_open_file { /* published fields */ struct kernfs_node *kn; struct file *file; + void *priv; /* private fields, do not use outside kernfs proper */ struct mutex mutex; -- cgit v1.2.3 From 0c23b2259a4850494e2c53e864ea840597c6cdd3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:15 -0500 Subject: kernfs: implement kernfs_node_from_dentry(), kernfs_root_from_sb() and kernfs_rename() Implement helpers to determine node from dentry and root from super_block. Also add a kernfs_rename_ns() wrapper which assumes NULL namespace. These generally make sense and will be used by cgroup. v2: Some dummy implementations for !CONFIG_SYSFS was missing. Fixed. Reported by kbuild test robot. Signed-off-by: Tejun Heo Cc: kbuild test robot Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 9ca0f09757a1..9c899040c05e 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -234,6 +234,9 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); +struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); +struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); + struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); @@ -288,6 +291,12 @@ kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } +static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) +{ return NULL; } + +static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) +{ return NULL; } + static inline struct kernfs_root * kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv) @@ -388,6 +397,13 @@ static inline int kernfs_remove_by_name(struct kernfs_node *parent, return kernfs_remove_by_name_ns(parent, name, NULL); } +static inline int kernfs_rename(struct kernfs_node *kn, + struct kernfs_node *new_parent, + const char *new_name) +{ + return kernfs_rename_ns(kn, new_parent, new_name, NULL); +} + static inline struct dentry * kernfs_mount(struct file_system_type *fs_type, int flags, struct kernfs_root *root) -- cgit v1.2.3 From 3eef34ad7dc369b7183ec383908aff3da2f6e5ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Feb 2014 13:32:07 -0500 Subject: kernfs: implement kernfs_get_parent(), kernfs_name/path() and friends kernfs_node->parent and ->name are currently marked as "published" indicating that kernfs users may access them directly; however, those fields may get updated by kernfs_rename[_ns]() and unrestricted access may lead to erroneous values or oops. Protect ->parent and ->name updates with a irq-safe spinlock kernfs_rename_lock and implement the following accessors for these fields. * kernfs_name() - format the node's name into the specified buffer * kernfs_path() - format the node's path into the specified buffer * pr_cont_kernfs_name() - pr_cont a node's name (doesn't need buffer) * pr_cont_kernfs_path() - pr_cont a node's path (doesn't need buffer) * kernfs_get_parent() - pin and return a node's parent All can be called under any context. The recursive sysfs_pathname() in fs/sysfs/dir.c is replaced with kernfs_path() and sysfs_rename_dir_ns() is updated to use kernfs_get_parent() instead of dereferencing parent directly. v2: Dummy definition of kernfs_path() for !CONFIG_KERNFS was missing static inline making it cause a lot of build warnings. Add it. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 9c899040c05e..8736ee86a1d6 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -91,7 +91,12 @@ struct kernfs_node { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif - /* the following two fields are published */ + /* + * Use kernfs_get_parent() and kernfs_name/path() instead of + * accessing the following two fields directly. If the node is + * never moved to a different parent, it is safe to access the + * parent directly. + */ struct kernfs_node *parent; const char *name; @@ -229,6 +234,12 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) return kn->flags & KERNFS_NS; } +int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); +char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, + size_t buflen); +void pr_cont_kernfs_name(struct kernfs_node *kn); +void pr_cont_kernfs_path(struct kernfs_node *kn); +struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns); void kernfs_get(struct kernfs_node *kn); @@ -283,6 +294,19 @@ static inline void kernfs_enable_ns(struct kernfs_node *kn) { } static inline bool kernfs_ns_enabled(struct kernfs_node *kn) { return false; } +static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) +{ return -ENOSYS; } + +static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, + size_t buflen) +{ return NULL; } + +static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { } +static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { } + +static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) +{ return NULL; } + static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) -- cgit v1.2.3 From fa4cd451cceb77e97432b91fcf50a7e4a7361e29 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Feb 2014 13:32:07 -0500 Subject: sysfs, kobject: add sysfs wrapper for kernfs_enable_ns() Currently, kobject is invoking kernfs_enable_ns() directly. This is fine now as sysfs and kernfs are enabled and disabled together. If sysfs is disabled, kernfs_enable_ns() is switched to dummy implementation too and everything is fine; however, kernfs will soon have its own config option CONFIG_KERNFS and !SYSFS && KERNFS will be possible, which can make kobject call into non-dummy kernfs_enable_ns() with NULL kernfs_node pointers leading to an oops. Introduce sysfs_enable_ns() which is a wrapper around kernfs_enable_ns() so that it can be made a noop depending only on CONFIG_SYSFS regardless of the planned CONFIG_KERNFS. Signed-off-by: Tejun Heo Reported-by: Fengguang Wu Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 14df05415af9..fdaa0c6fc7a2 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -244,6 +244,11 @@ void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); int __must_check sysfs_init(void); +static inline void sysfs_enable_ns(struct kernfs_node *kn) +{ + return kernfs_enable_ns(kn); +} + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) @@ -416,6 +421,10 @@ static inline int __must_check sysfs_init(void) return 0; } +static inline void sysfs_enable_ns(struct kernfs_node *kn) +{ +} + #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From ba341d55a420ab4fdd1a53fd395fd59bd65de880 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Feb 2014 14:09:17 -0500 Subject: kernfs: add CONFIG_KERNFS As sysfs was kernfs's only user, kernfs has been piggybacking on CONFIG_SYSFS; however, kernfs is scheduled to grow a new user very soon. Introduce a separate config option CONFIG_KERNFS which is to be selected by kernfs users. Signed-off-by: Tejun Heo Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 8736ee86a1d6..649497a56a95 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -201,7 +201,7 @@ struct kernfs_ops { #endif }; -#ifdef CONFIG_SYSFS +#ifdef CONFIG_KERNFS static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { @@ -284,7 +284,7 @@ void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); -#else /* CONFIG_SYSFS */ +#else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { return 0; } /* whatever */ @@ -379,7 +379,7 @@ static inline void kernfs_kill_sb(struct super_block *sb) { } static inline void kernfs_init(void) { } -#endif /* CONFIG_SYSFS */ +#endif /* CONFIG_KERNFS */ static inline struct kernfs_node * kernfs_find_and_get(struct kernfs_node *kn, const char *name) -- cgit v1.2.3 From 67bad2fdb754dbef14596c0b5d28b3a12c8dfe84 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 8 Feb 2014 13:34:09 +0100 Subject: cpu: add generic support for CPU feature based module autoloading This patch adds support for advertising optional CPU features over udev using the modalias, and for declaring compatibility with/dependency upon such a feature in a module. The mapping between feature numbers and actual features should be provided by the architecture in a file called which exports the following functions/macros: - cpu_feature(FEAT), a preprocessor macro that maps token FEAT to a numeric index; - bool cpu_have_feature(n), returning whether this CPU has support for feature #n; - MAX_CPU_FEATURES, an upper bound for 'n' in the previous function. The feature can then be enabled by setting CONFIG_GENERIC_CPU_AUTOPROBE for the architecture. For instance, a module that registers its module init function using module_cpu_feature_match(FEAT_X, module_init_function) will be probed automatically when the CPU's support for the 'FEAT_X' feature is advertised over udev, and will only allow the module to be loaded by hand if the 'FEAT_X' feature is supported. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- include/linux/cpufeature.h | 60 +++++++++++++++++++++++++++++++++++++++++ include/linux/mod_devicetable.h | 9 +++++++ 2 files changed, 69 insertions(+) create mode 100644 include/linux/cpufeature.h (limited to 'include/linux') diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h new file mode 100644 index 000000000000..c4d4eb8ac9fe --- /dev/null +++ b/include/linux/cpufeature.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_CPUFEATURE_H +#define __LINUX_CPUFEATURE_H + +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE + +#include +#include + +/* + * Macros imported from : + * - cpu_feature(x) ordinal value of feature called 'x' + * - cpu_have_feature(u32 n) whether feature #n is available + * - MAX_CPU_FEATURES upper bound for feature ordinal values + * Optional: + * - CPU_FEATURE_TYPEFMT format string fragment for printing the cpu type + * - CPU_FEATURE_TYPEVAL set of values matching the format string above + */ + +#ifndef CPU_FEATURE_TYPEFMT +#define CPU_FEATURE_TYPEFMT "%s" +#endif + +#ifndef CPU_FEATURE_TYPEVAL +#define CPU_FEATURE_TYPEVAL ELF_PLATFORM +#endif + +/* + * Use module_cpu_feature_match(feature, module_init_function) to + * declare that + * a) the module shall be probed upon discovery of CPU feature 'feature' + * (typically at boot time using udev) + * b) the module must not be loaded if CPU feature 'feature' is not present + * (not even by manual insmod). + * + * For a list of legal values for 'feature', please consult the file + * 'asm/cpufeature.h' of your favorite architecture. + */ +#define module_cpu_feature_match(x, __init) \ +static struct cpu_feature const cpu_feature_match_ ## x[] = \ + { { .feature = cpu_feature(x) }, { } }; \ +MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \ + \ +static int cpu_feature_match_ ## x ## _init(void) \ +{ \ + if (!cpu_have_feature(cpu_feature(x))) \ + return -ENODEV; \ + return __init(); \ +} \ +module_init(cpu_feature_match_ ## x ## _init) + +#endif +#endif diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 45e921401b06..f2ac87c613a5 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -564,6 +564,15 @@ struct x86_cpu_id { #define X86_MODEL_ANY 0 #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +/* + * Generic table type for matching CPU features. + * @feature: the bit number of the feature (0 - 65535) + */ + +struct cpu_feature { + __u16 feature; +}; + #define IPACK_ANY_FORMAT 0xff #define IPACK_ANY_ID (~0) struct ipack_device_id { -- cgit v1.2.3 From 2b9c1f03278ab7cd421f14ce24dee39091ecb064 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 8 Feb 2014 13:34:10 +0100 Subject: x86: align x86 arch with generic CPU modalias handling The x86 CPU feature modalias handling existed before it was reimplemented generically. This patch aligns the x86 handling so that it (a) reuses some more code that is now generic; (b) uses the generic format for the modalias module metadata entry, i.e., it now uses 'cpu:type:x86,venVVVVfamFFFFmodMMMM:feature:,XXXX,YYYY' instead of the 'x86cpu:vendor:VVVV:family:FFFF:model:MMMM:feature:,XXXX,YYYY' that was used before. Signed-off-by: Ard Biesheuvel Acked-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 03e235ad1bba..03e962e23eaf 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -46,13 +46,6 @@ extern ssize_t arch_cpu_release(const char *, size_t); #endif struct notifier_block; -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE -extern int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env); -extern ssize_t arch_print_cpu_modalias(struct device *dev, - struct device_attribute *attr, - char *bufptr); -#endif - /* * CPU notifier priorities. */ -- cgit v1.2.3 From b7ce40cff0b9f6597f8318fd761accd92727f61f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Mar 2014 15:38:46 -0500 Subject: kernfs: cache atomic_write_len in kernfs_open_file While implementing atomic_write_len, 4d3773c4bb41 ("kernfs: implement kernfs_ops->atomic_write_len") moved data copy from userland inside kernfs_get_active() and kernfs_open_file->mutex so that kernfs_ops->atomic_write_len can be accessed before copying buffer from userland; unfortunately, this could lead to locking order inversion involving mmap_sem if copy_from_user() takes a page fault. ====================================================== [ INFO: possible circular locking dependency detected ] 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 Tainted: G W ------------------------------------------------------- trinity-c236/10658 is trying to acquire lock: (&of->mutex#2){+.+.+.}, at: [] kernfs_fop_mmap+0x54/0x120 but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x6e/0xe0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] lock_acquire+0x182/0x1d0 [] might_fault+0x7e/0xb0 [] kernfs_fop_write+0xd8/0x190 [] vfs_write+0xe3/0x1d0 [] SyS_write+0x5d/0xa0 [] tracesys+0xdd/0xe2 -> #0 (&of->mutex#2){+.+.+.}: [] check_prev_add+0x13f/0x560 [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] lock_acquire+0x182/0x1d0 [] mutex_lock_nested+0x6a/0x510 [] kernfs_fop_mmap+0x54/0x120 [] mmap_region+0x310/0x5c0 [] do_mmap_pgoff+0x385/0x430 [] vm_mmap_pgoff+0x8f/0xe0 [] SyS_mmap_pgoff+0x1b0/0x210 [] SyS_mmap+0x1d/0x20 [] tracesys+0xdd/0xe2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&of->mutex#2); lock(&mm->mmap_sem); lock(&of->mutex#2); *** DEADLOCK *** 1 lock held by trinity-c236/10658: #0: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x6e/0xe0 stack backtrace: CPU: 2 PID: 10658 Comm: trinity-c236 Tainted: G W 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 0000000000000000 ffff88011911fa48 ffffffff8438e945 0000000000000000 0000000000000000 ffff88011911fa98 ffffffff811a0109 ffff88011911fab8 ffff88011911fab8 ffff88011911fa98 ffff880119128cc0 ffff880119128cf8 Call Trace: [] dump_stack+0x52/0x7f [] print_circular_bug+0x129/0x160 [] check_prev_add+0x13f/0x560 [] ? deactivate_slab+0x511/0x550 [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] ? mmap_region+0x24a/0x5c0 [] lock_acquire+0x182/0x1d0 [] ? kernfs_fop_mmap+0x54/0x120 [] mutex_lock_nested+0x6a/0x510 [] ? kernfs_fop_mmap+0x54/0x120 [] ? get_parent_ip+0x11/0x50 [] ? kernfs_fop_mmap+0x54/0x120 [] kernfs_fop_mmap+0x54/0x120 [] mmap_region+0x310/0x5c0 [] do_mmap_pgoff+0x385/0x430 [] ? vm_mmap_pgoff+0x6e/0xe0 [] vm_mmap_pgoff+0x8f/0xe0 [] ? __rcu_read_unlock+0x44/0xb0 [] ? dup_fd+0x3c0/0x3c0 [] SyS_mmap_pgoff+0x1b0/0x210 [] SyS_mmap+0x1d/0x20 [] tracesys+0xdd/0xe2 Fix it by caching atomic_write_len in kernfs_open_file during open so that it can be determined without accessing kernfs_ops in kernfs_fop_write(). This restores the structure of kernfs_fop_write() before 4d3773c4bb41 with updated @len determination logic. Signed-off-by: Tejun Heo Reported-by: Sasha Levin References: http://lkml.kernel.org/g/53113485.2090407@oracle.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 09669d092748..b0122dc6f96a 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -158,6 +158,7 @@ struct kernfs_open_file { int event; struct list_head list; + size_t atomic_write_len; bool mmapped; const struct vm_operations_struct *vm_ops; }; -- cgit v1.2.3 From 72099304eeb316c4b00df3ae83efe4375729bd78 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Mar 2014 20:54:57 -0700 Subject: Revert "sysfs, driver-core: remove unused {sysfs|device}_schedule_callback_owner()" This reverts commit d1ba277e79889085a2faec3b68b91ce89c63f888. As reported by Stephen, this patch breaks linux-next as a ppc patch suddenly (after 2 years) started using this old api call. So revert it for now, it will go away in 3.15-rc2 when we can change the PPC call to the new api. Reported-by: Stephen Rothwell Cc: Tejun Heo Cc: Stewart Smith Cc: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 11 ++++++++++- include/linux/sysfs.h | 9 +++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index fb1ba13f7665..1ff3f1697513 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -566,6 +566,12 @@ extern int __must_check device_create_bin_file(struct device *dev, const struct bin_attribute *attr); extern void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); +extern int device_schedule_callback_owner(struct device *dev, + void (*func)(struct device *dev), struct module *owner); + +/* This is a macro to avoid include problems with THIS_MODULE */ +#define device_schedule_callback(dev, func) \ + device_schedule_callback_owner(dev, func, THIS_MODULE) /* device resource management */ typedef void (*dr_release_t)(struct device *dev, void *res); @@ -925,7 +931,10 @@ extern int device_online(struct device *dev); extern struct device *__root_device_register(const char *name, struct module *owner); -/* This is a macro to avoid include problems with THIS_MODULE */ +/* + * This is a macro to avoid include problems with THIS_MODULE, + * just as per what is done for device_schedule_callback() above. + */ #define root_device_register(name) \ __root_device_register(name, THIS_MODULE) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fdaa0c6fc7a2..e0bf210ddffd 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -178,6 +178,9 @@ struct sysfs_ops { #ifdef CONFIG_SYSFS +int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), + void *data, struct module *owner); + int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns); void sysfs_remove_dir(struct kobject *kobj); int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, @@ -251,6 +254,12 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) #else /* CONFIG_SYSFS */ +static inline int sysfs_schedule_callback(struct kobject *kobj, + void (*func)(void *), void *data, struct module *owner) +{ + return -ENOSYS; +} + static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { return 0; -- cgit v1.2.3