diff options
| author | Shailabh Nagar <nagar@watson.ibm.com> | 2006-07-14 00:24:41 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-14 21:53:56 -0700 | 
| commit | 6f44993fe1d7b2b097f6ac60cd5835c6f5ca0874 (patch) | |
| tree | 0f349f4e6c28cc5d11b7083273543a294c437216 | |
| parent | c757249af152c59fd74b85e52e8c090acb33d9c0 (diff) | |
[PATCH] per-task-delay-accounting: delay accounting usage of taskstats interface
Usage of taskstats interface by delay accounting.
Signed-off-by: Shailabh Nagar <nagar@us.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
Cc: Jes Sorensen <jes@sgi.com>
Cc: Peter Chubb <peterc@gelato.unsw.edu.au>
Cc: Erich Focht <efocht@ess.nec.de>
Cc: Levent Serinol <lserinol@gmail.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | include/linux/delayacct.h | 15 | ||||
| -rw-r--r-- | include/linux/sched.h | 1 | ||||
| -rw-r--r-- | include/linux/taskstats.h | 55 | ||||
| -rw-r--r-- | include/linux/taskstats_kern.h | 1 | ||||
| -rw-r--r-- | init/Kconfig | 1 | ||||
| -rw-r--r-- | kernel/delayacct.c | 62 | ||||
| -rw-r--r-- | kernel/taskstats.c | 16 | 
7 files changed, 144 insertions, 7 deletions
| diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 0ecbf9aad8e1..d955078a1441 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -18,6 +18,7 @@  #define _LINUX_DELAYACCT_H  #include <linux/sched.h> +#include <linux/taskstats_kern.h>  /*   * Per-task flags relevant to delay accounting @@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct task_struct *);  extern void __delayacct_tsk_exit(struct task_struct *);  extern void __delayacct_blkio_start(void);  extern void __delayacct_blkio_end(void); +extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);  static inline void delayacct_set_flag(int flag)  { @@ -74,6 +76,16 @@ static inline void delayacct_blkio_end(void)  		__delayacct_blkio_end();  } +static inline int delayacct_add_tsk(struct taskstats *d, +					struct task_struct *tsk) +{ +	if (likely(!delayacct_on)) +		return -EINVAL; +	if (!tsk->delays) +		return 0; +	return __delayacct_add_tsk(d, tsk); +} +  #else  static inline void delayacct_set_flag(int flag)  {} @@ -89,6 +101,9 @@ static inline void delayacct_blkio_start(void)  {}  static inline void delayacct_blkio_end(void)  {} +static inline int delayacct_add_tsk(struct taskstats *d, +					struct task_struct *tsk) +{ return 0; }  #endif /* CONFIG_TASK_DELAY_ACCT */  #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index f751062d89a2..3c5610ca0c92 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -990,6 +990,7 @@ struct task_struct {  	 */  	struct pipe_inode_info *splice_pipe;  #ifdef	CONFIG_TASK_DELAY_ACCT +	spinlock_t delays_lock;  	struct task_delay_info *delays;  #endif  }; diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 51f62759bea9..c6aeca32348e 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -34,7 +34,60 @@  struct taskstats {  	/* Version 1 */ -	__u64	version; +	__u16	version; +	__u16	padding[3];	/* Userspace should not interpret the padding +				 * field which can be replaced by useful +				 * fields if struct taskstats is extended. +				 */ + +	/* Delay accounting fields start +	 * +	 * All values, until comment "Delay accounting fields end" are +	 * available only if delay accounting is enabled, even though the last +	 * few fields are not delays +	 * +	 * xxx_count is the number of delay values recorded +	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds +	 * +	 * xxx_delay_total wraps around to zero on overflow +	 * xxx_count incremented regardless of overflow +	 */ + +	/* Delay waiting for cpu, while runnable +	 * count, delay_total NOT updated atomically +	 */ +	__u64	cpu_count; +	__u64	cpu_delay_total; + +	/* Following four fields atomically updated using task->delays->lock */ + +	/* Delay waiting for synchronous block I/O to complete +	 * does not account for delays in I/O submission +	 */ +	__u64	blkio_count; +	__u64	blkio_delay_total; + +	/* Delay waiting for page fault I/O (swap in only) */ +	__u64	swapin_count; +	__u64	swapin_delay_total; + +	/* cpu "wall-clock" running time +	 * On some architectures, value will adjust for cpu time stolen +	 * from the kernel in involuntary waits due to virtualization. +	 * Value is cumulative, in nanoseconds, without a corresponding count +	 * and wraps around to zero silently on overflow +	 */ +	__u64	cpu_run_real_total; + +	/* cpu "virtual" running time +	 * Uses time intervals seen by the kernel i.e. no adjustment +	 * for kernel's involuntary waits due to virtualization. +	 * Value is cumulative, in nanoseconds, without a corresponding count +	 * and wraps around to zero silently on overflow +	 */ +	__u64	cpu_run_virtual_total; +	/* Delay accounting fields end */ +	/* version 1 ends here */  }; diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index bd0ecb969c26..fc9da2e26443 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -17,6 +17,7 @@ enum {  #ifdef CONFIG_TASKSTATS  extern kmem_cache_t *taskstats_cache; +extern struct mutex taskstats_exit_mutex;  static inline void taskstats_exit_alloc(struct taskstats **ptidstats,  					struct taskstats **ptgidstats) diff --git a/init/Kconfig b/init/Kconfig index 56a7093b4e4c..a099fc6526d9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -173,6 +173,7 @@ config TASKSTATS  config TASK_DELAY_ACCT  	bool "Enable per-task delay accounting (EXPERIMENTAL)" +	depends on TASKSTATS  	help  	  Collect information on time spent by a task waiting for system  	  resources like cpu, synchronous block I/O completion and swapping diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 3546b0800f9f..1be274a462ca 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -41,6 +41,10 @@ void delayacct_init(void)  void __delayacct_tsk_init(struct task_struct *tsk)  { +	spin_lock_init(&tsk->delays_lock); +	/* No need to acquire tsk->delays_lock for allocation here unless +	   __delayacct_tsk_init called after tsk is attached to tasklist +	*/  	tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);  	if (tsk->delays)  		spin_lock_init(&tsk->delays->lock); @@ -48,8 +52,11 @@ void __delayacct_tsk_init(struct task_struct *tsk)  void __delayacct_tsk_exit(struct task_struct *tsk)  { -	kmem_cache_free(delayacct_cache, tsk->delays); +	struct task_delay_info *delays = tsk->delays; +	spin_lock(&tsk->delays_lock);  	tsk->delays = NULL; +	spin_unlock(&tsk->delays_lock); +	kmem_cache_free(delayacct_cache, delays);  }  /* @@ -104,3 +111,56 @@ void __delayacct_blkio_end(void)  			¤t->delays->blkio_delay,  			¤t->delays->blkio_count);  } + +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) +{ +	s64 tmp; +	struct timespec ts; +	unsigned long t1,t2,t3; + +	spin_lock(&tsk->delays_lock); + +	/* Though tsk->delays accessed later, early exit avoids +	 * unnecessary returning of other data +	 */ +	if (!tsk->delays) +		goto done; + +	tmp = (s64)d->cpu_run_real_total; +	cputime_to_timespec(tsk->utime + tsk->stime, &ts); +	tmp += timespec_to_ns(&ts); +	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; + +	/* +	 * No locking available for sched_info (and too expensive to add one) +	 * Mitigate by taking snapshot of values +	 */ +	t1 = tsk->sched_info.pcnt; +	t2 = tsk->sched_info.run_delay; +	t3 = tsk->sched_info.cpu_time; + +	d->cpu_count += t1; + +	jiffies_to_timespec(t2, &ts); +	tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); +	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; + +	tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; +	d->cpu_run_virtual_total = +		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp; + +	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ + +	spin_lock(&tsk->delays->lock); +	tmp = d->blkio_delay_total + tsk->delays->blkio_delay; +	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; +	tmp = d->swapin_delay_total + tsk->delays->swapin_delay; +	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; +	d->blkio_count += tsk->delays->blkio_count; +	d->swapin_count += tsk->delays->swapin_count; +	spin_unlock(&tsk->delays->lock); + +done: +	spin_unlock(&tsk->delays_lock); +	return 0; +} diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 82ec9137d908..ea9506de3b85 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -18,13 +18,13 @@  #include <linux/kernel.h>  #include <linux/taskstats_kern.h> +#include <linux/delayacct.h>  #include <net/genetlink.h>  #include <asm/atomic.h>  static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };  static int family_registered;  kmem_cache_t *taskstats_cache; -static DEFINE_MUTEX(taskstats_exit_mutex);  static struct genl_family family = {  	.id		= GENL_ID_GENERATE, @@ -120,7 +120,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,  	 *		goto err;  	 */ -err: +	rc = delayacct_add_tsk(stats, tsk); +	stats->version = TASKSTATS_VERSION; + +	/* Define err: label here if needed */  	put_task_struct(tsk);  	return rc; @@ -152,8 +155,14 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,  		 *		break;  		 */ +		rc = delayacct_add_tsk(stats, tsk); +		if (rc) +			break; +  	} while_each_thread(first, tsk);  	read_unlock(&tasklist_lock); +	stats->version = TASKSTATS_VERSION; +  	/*  	 * Accounting subsytems can also add calls here if they don't @@ -233,8 +242,6 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,  	if (!family_registered || !tidstats)  		return; -	mutex_lock(&taskstats_exit_mutex); -  	is_thread_group = !thread_group_empty(tsk);  	rc = 0; @@ -292,7 +299,6 @@ nla_put_failure:  err_skb:  	nlmsg_free(rep_skb);  ret: -	mutex_unlock(&taskstats_exit_mutex);  	return;  } | 
