From 40bea431274c247425e7f5970d796ff7b37a2b22 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Sep 2009 20:40:25 +0100 Subject: dm stripe: expose correct io hints Set sensible I/O hints for striped DM devices in the topology infrastructure added for 2.6.31 for userspace tools to obtain via sysfs. Add .io_hints to 'struct target_type' to allow the I/O hints portion (io_min and io_opt) of the 'struct queue_limits' to be set by each target and implement this for dm-stripe. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 655e7721580a..df7607e6dce8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -91,6 +91,9 @@ typedef int (*dm_iterate_devices_fn) (struct dm_target *ti, iterate_devices_callout_fn fn, void *data); +typedef void (*dm_io_hints_fn) (struct dm_target *ti, + struct queue_limits *limits); + /* * Returns: * 0: The target can handle the next I/O immediately. @@ -151,6 +154,7 @@ struct target_type { dm_merge_fn merge; dm_busy_fn busy; dm_iterate_devices_fn iterate_devices; + dm_io_hints_fn io_hints; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From 7ec23d50949d5062b5b749638dd9380ed75e58e5 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 4 Sep 2009 20:40:34 +0100 Subject: dm log: userspace add luid to distinguish between concurrent log instances Device-mapper userspace logs (like the clustered log) are identified by a universally unique identifier (UUID). This identifier is used to associate requests from the kernel to a specific log in userspace. The UUID must be unique everywhere, since multiple machines may use this identifier when communicating about a particular log, as is the case for cluster logs. Sometimes, device-mapper/LVM may re-use a UUID. This is the case during pvmoves, when moving from one segment of an LV to another, or when resizing a mirror, etc. In these cases, a new log is created with the same UUID and loaded in the "inactive" slot. When a device-mapper "resume" is issued, the "live" table is deactivated and the new "inactive" table becomes "live". (The "inactive" table can also be removed via a device-mapper 'clear' command.) The above two issues were colliding. More than one log was being created with the same UUID, and there was no way to distinguish between them. So, sometimes the wrong log would be swapped out during the exchange. The solution is to create a locally unique identifier, 'luid', to go along with the UUID. This new identifier is used to determine exactly which log is being referenced by the kernel when the log exchange is made. The identifier is not universally safe, but it does not need to be, since create/destroy/suspend/resume operations are bound to a specific machine; and these are the operations that make up the exchange. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- include/linux/dm-log-userspace.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h index 642e3017b51f..8a1f972c0fe9 100644 --- a/include/linux/dm-log-userspace.h +++ b/include/linux/dm-log-userspace.h @@ -371,7 +371,18 @@ (DM_ULOG_REQUEST_MASK & (request_type)) struct dm_ulog_request { - char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */ + /* + * The local unique identifier (luid) and the universally unique + * identifier (uuid) are used to tie a request to a specific + * mirror log. A single machine log could probably make due with + * just the 'luid', but a cluster-aware log must use the 'uuid' and + * the 'luid'. The uuid is what is required for node to node + * communication concerning a particular log, but the 'luid' helps + * differentiate between logs that are being swapped and have the + * same 'uuid'. (Think "live" and "inactive" device-mapper tables.) + */ + uint64_t luid; + char uuid[DM_UUID_LEN]; char padding[7]; /* Padding because DM_UUID_LEN = 129 */ int32_t error; /* Used to report back processing errors */ -- cgit v1.2.3 From 4e49627b9bc29a14b393c480e8c979e3bc922ef7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 5 Sep 2009 11:17:06 -0700 Subject: workqueues: introduce __cancel_delayed_work() cancel_delayed_work() has to use del_timer_sync() to guarantee the timer function is not running after return. But most users doesn't actually need this, and del_timer_sync() has problems: it is not useable from interrupt, and it depends on every lock which could be taken from irq. Introduce __cancel_delayed_work() which calls del_timer() instead. The immediate reason for this patch is http://bugzilla.kernel.org/show_bug.cgi?id=13757 but hopefully this helper makes sense anyway. As for 13757 bug, actually we need requeue_delayed_work(), but its semantics are not yet clear. Merge this patch early to resolves cross-tree interdependencies between input and infiniband. Signed-off-by: Oleg Nesterov Cc: Dmitry Torokhov Cc: Roland Dreier Cc: Stefan Richter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 13e1adf55c4c..6273fa97b527 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -240,6 +240,21 @@ static inline int cancel_delayed_work(struct delayed_work *work) return ret; } +/* + * Like above, but uses del_timer() instead of del_timer_sync(). This means, + * if it returns 0 the timer function may be running and the queueing is in + * progress. + */ +static inline int __cancel_delayed_work(struct delayed_work *work) +{ + int ret; + + ret = del_timer(&work->timer); + if (ret) + work_clear_pending(&work->work); + return ret; +} + extern int cancel_delayed_work_sync(struct delayed_work *work); /* Obsolete. use cancel_delayed_work_sync() */ -- cgit v1.2.3 From a2a8474c3fff88d8dd52d05cb450563fb26fd26c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 5 Sep 2009 11:17:13 -0700 Subject: exec: do not sleep in TASK_TRACED under ->cred_guard_mutex Tom Horsley reports that his debugger hangs when it tries to read /proc/pid_of_tracee/maps, this happens since "mm_for_maps: take ->cred_guard_mutex to fix the race with exec" 04b836cbf19e885f8366bccb2e4b0474346c02d commit in 2.6.31. But the root of the problem lies in the fact that do_execve() path calls tracehook_report_exec() which can stop if the tracer sets PT_TRACE_EXEC. The tracee must not sleep in TASK_TRACED holding this mutex. Even if we remove ->cred_guard_mutex from mm_for_maps() and proc_pid_attr_write(), another task doing PTRACE_ATTACH should not hang until it is killed or the tracee resumes. With this patch do_execve() does not use ->cred_guard_mutex directly and we do not hold it throughout, instead: - introduce prepare_bprm_creds() helper, it locks the mutex and calls prepare_exec_creds() to initialize bprm->cred. - install_exec_creds() drops the mutex after commit_creds(), and thus before tracehook_report_exec()->ptrace_stop(). or, if exec fails, free_bprm() drops this mutex when bprm->cred != NULL which indicates install_exec_creds() was not called. Reported-by: Tom Horsley Signed-off-by: Oleg Nesterov Acked-by: David Howells Cc: Roland McGrath Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 61ee18c1bdb4..2046b5b8af48 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -117,6 +117,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); +extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); extern int set_binfmt(struct linux_binfmt *new); -- cgit v1.2.3