From 85893120699f8bae8caa12a8ee18ab5fceac978e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 27 Oct 2010 15:34:43 -0700 Subject: delayacct: align to 8 byte boundary on 64-bit systems prepare_reply() sets up an skb for the response. The payload contains: +--------------------------------+ | genlmsghdr - 4 bytes | +--------------------------------+ | NLA header - 4 bytes | /* Aggregate header */ +-+------------------------------+ | | NLA header - 4 bytes | /* PID header */ | +------------------------------+ | | pid/tgid - 4 bytes | | +------------------------------+ | | NLA header - 4 bytes | /* stats header */ | + -----------------------------+ <- oops. aligned on 4 byte boundary | | struct taskstats - 328 bytes | +-+------------------------------+ The start of the taskstats struct must be 8 byte aligned on IA64 (and other systems with 8 byte alignment rules for 64-bit types) or runtime alignment warnings will be issued. This patch pads the pid/tgid field out to sizeof(long), which forces the alignment of taskstats. The getdelays userspace code is ok with this since it assumes 32-bit pid/tgid and then honors that header's length field. An array is used to avoid exposing kernel memory contents to userspace in the response. Signed-off-by: Jeff Mahoney Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 11281d5792bd..5a651aa63d61 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -360,6 +360,12 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) struct nlattr *na, *ret; int aggr; + /* If we don't pad, we end up with alignment on a 4 byte boundary. + * This causes lots of runtime warnings on systems requiring 8 byte + * alignment */ + u32 pids[2] = { pid, 0 }; + int pid_size = ALIGN(sizeof(pid), sizeof(long)); + aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; @@ -367,7 +373,7 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) na = nla_nest_start(skb, aggr); if (!na) goto err; - if (nla_put(skb, type, sizeof(pid), &pid) < 0) + if (nla_put(skb, type, pid_size, pids) < 0) goto err; ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); if (!ret) -- cgit v1.2.3 From 9323312592cca636d7c2580dc85fa4846efa86a2 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:44 -0700 Subject: taskstats: separate taskstats commands Move each taskstats command into a single function. This makes the code more readable and makes it easier to add new commands. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 118 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 40 deletions(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 5a651aa63d61..9970cae04f15 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -430,39 +430,46 @@ err: return rc; } -static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) +static int cmd_attr_register_cpumask(struct genl_info *info) { - int rc; - struct sk_buff *rep_skb; - struct taskstats *stats; - size_t size; cpumask_var_t mask; + int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) - goto free_return_rc; - if (rc == 0) { - rc = add_del_listener(info->snd_pid, mask, REGISTER); - goto free_return_rc; - } + goto out; + rc = add_del_listener(info->snd_pid, mask, REGISTER); +out: + free_cpumask_var(mask); + return rc; +} +static int cmd_attr_deregister_cpumask(struct genl_info *info) +{ + cpumask_var_t mask; + int rc; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) - goto free_return_rc; - if (rc == 0) { - rc = add_del_listener(info->snd_pid, mask, DEREGISTER); -free_return_rc: - free_cpumask_var(mask); - return rc; - } + goto out; + rc = add_del_listener(info->snd_pid, mask, DEREGISTER); +out: free_cpumask_var(mask); + return rc; +} + +static int cmd_attr_pid(struct genl_info *info) +{ + struct taskstats *stats; + struct sk_buff *rep_skb; + size_t size; + u32 pid; + int rc; - /* - * Size includes space for nested attributes - */ size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); @@ -471,33 +478,64 @@ free_return_rc: return rc; rc = -EINVAL; - if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { - u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); - stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); - if (!stats) - goto err; - - rc = fill_pid(pid, NULL, stats); - if (rc < 0) - goto err; - } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { - u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); - stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); - if (!stats) - goto err; - - rc = fill_tgid(tgid, NULL, stats); - if (rc < 0) - goto err; - } else + pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); + if (!stats) + goto err; + + rc = fill_pid(pid, NULL, stats); + if (rc < 0) + goto err; + return send_reply(rep_skb, info); +err: + nlmsg_free(rep_skb); + return rc; +} + +static int cmd_attr_tgid(struct genl_info *info) +{ + struct taskstats *stats; + struct sk_buff *rep_skb; + size_t size; + u32 tgid; + int rc; + + size = nla_total_size(sizeof(u32)) + + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + + rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); + if (rc < 0) + return rc; + + rc = -EINVAL; + tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); + if (!stats) goto err; + rc = fill_tgid(tgid, NULL, stats); + if (rc < 0) + goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } +static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) +{ + if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) + return cmd_attr_register_cpumask(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) + return cmd_attr_deregister_cpumask(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) + return cmd_attr_pid(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) + return cmd_attr_tgid(info); + else + return -EINVAL; +} + static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; -- cgit v1.2.3 From 3d9e0cf1fe007b88db55d43dfdb6839e1a029ca5 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:44 -0700 Subject: taskstats: split fill_pid function Separate the finding of a task_struct by pid or tgid from filling the taskstats data. This makes the code more readable. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 50 +++++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 9970cae04f15..c8231fb15708 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -175,22 +175,8 @@ static void send_cpu_listeners(struct sk_buff *skb, up_write(&listeners->sem); } -static int fill_pid(pid_t pid, struct task_struct *tsk, - struct taskstats *stats) +static void fill_stats(struct task_struct *tsk, struct taskstats *stats) { - int rc = 0; - - if (!tsk) { - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); - if (!tsk) - return -ESRCH; - } else - get_task_struct(tsk); - memset(stats, 0, sizeof(*stats)); /* * Each accounting subsystem adds calls to its functions to @@ -209,17 +195,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); +} - /* Define err: label here if needed */ - put_task_struct(tsk); - return rc; +static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) +{ + struct task_struct *tsk; + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return -ESRCH; + fill_stats(tsk, stats); + put_task_struct(tsk); + return 0; } -static int fill_tgid(pid_t tgid, struct task_struct *first, - struct taskstats *stats) +static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) { - struct task_struct *tsk; + struct task_struct *tsk, *first; unsigned long flags; int rc = -ESRCH; @@ -228,8 +224,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, * leaders who are already counted with the dead tasks */ rcu_read_lock(); - if (!first) - first = find_task_by_vpid(tgid); + first = find_task_by_vpid(tgid); if (!first || !lock_task_sighand(first, &flags)) goto out; @@ -268,7 +263,6 @@ out: return rc; } - static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; @@ -483,7 +477,7 @@ static int cmd_attr_pid(struct genl_info *info) if (!stats) goto err; - rc = fill_pid(pid, NULL, stats); + rc = fill_stats_for_pid(pid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); @@ -513,7 +507,7 @@ static int cmd_attr_tgid(struct genl_info *info) if (!stats) goto err; - rc = fill_tgid(tgid, NULL, stats); + rc = fill_stats_for_tgid(tgid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); @@ -599,9 +593,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) if (!stats) goto err; - rc = fill_pid(-1, tsk, stats); - if (rc < 0) - goto err; + fill_stats(tsk, stats); /* * Doesn't matter if tsk is the leader or the last group member leaving -- cgit v1.2.3 From 4be2c95d1f7706ca0e74499f2bd118e1cee19669 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 21 Dec 2010 17:24:30 -0800 Subject: taskstats: pad taskstats netlink response for aligment issues on ia64 The taskstats structure is internally aligned on 8 byte boundaries but the layout of the aggregrate reply, with two NLA headers and the pid (each 4 bytes), actually force the entire structure to be unaligned. This causes the kernel to issue unaligned access warnings on some architectures like ia64. Unfortunately, some software out there doesn't properly unroll the NLA packet and assumes that the start of the taskstats structure will always be 20 bytes from the start of the netlink payload. Aligning the start of the taskstats structure breaks this software, which we don't want. So, for now the alignment only happens on architectures that require it and those users will have to update to fixed versions of those packages. Space is reserved in the packet only when needed. This ifdef should be removed in several years e.g. 2012 once we can be confident that fixed versions are installed on most systems. We add the padding before the aggregate since the aggregate is already a defined type. Commit 85893120 ("delayacct: align to 8 byte boundary on 64-bit systems") previously addressed the alignment issues by padding out the pid field. This was supposed to be a compatible change but the circumstances described above mean that it wasn't. This patch backs out that change, since it was a hack, and introduces a new NULL attribute type to provide the padding. Padding the response with 4 bytes avoids allocating an aligned taskstats structure and copying it back. Since the structure weighs in at 328 bytes, it's too big to do it on the stack. Signed-off-by: Jeff Mahoney Reported-by: Brian Rogers Cc: Jeff Mahoney Cc: Guillaume Chazarain Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 57 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 13 deletions(-) (limited to 'kernel/taskstats.c') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index c8231fb15708..3308fd7f1b52 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask) return ret; } +#ifdef CONFIG_IA64 +#define TASKSTATS_NEEDS_PADDING 1 +#endif + static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; int aggr; - /* If we don't pad, we end up with alignment on a 4 byte boundary. - * This causes lots of runtime warnings on systems requiring 8 byte - * alignment */ - u32 pids[2] = { pid, 0 }; - int pid_size = ALIGN(sizeof(pid), sizeof(long)); - aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; + /* + * The taskstats structure is internally aligned on 8 byte + * boundaries but the layout of the aggregrate reply, with + * two NLA headers and the pid (each 4 bytes), actually + * force the entire structure to be unaligned. This causes + * the kernel to issue unaligned access warnings on some + * architectures like ia64. Unfortunately, some software out there + * doesn't properly unroll the NLA packet and assumes that the start + * of the taskstats structure will always be 20 bytes from the start + * of the netlink payload. Aligning the start of the taskstats + * structure breaks this software, which we don't want. So, for now + * the alignment only happens on architectures that require it + * and those users will have to update to fixed versions of those + * packages. Space is reserved in the packet only when needed. + * This ifdef should be removed in several years e.g. 2012 once + * we can be confident that fixed versions are installed on most + * systems. We add the padding before the aggregate since the + * aggregate is already a defined type. + */ +#ifdef TASKSTATS_NEEDS_PADDING + if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) + goto err; +#endif na = nla_nest_start(skb, aggr); if (!na) goto err; - if (nla_put(skb, type, pid_size, pids) < 0) + + if (nla_put(skb, type, sizeof(pid), &pid) < 0) goto err; ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); if (!ret) @@ -456,6 +478,18 @@ out: return rc; } +static size_t taskstats_packet_size(void) +{ + size_t size; + + size = nla_total_size(sizeof(u32)) + + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); +#ifdef TASKSTATS_NEEDS_PADDING + size += nla_total_size(0); /* Padding for alignment */ +#endif + return size; +} + static int cmd_attr_pid(struct genl_info *info) { struct taskstats *stats; @@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info) u32 pid; int rc; - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) @@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info) u32 tgid; int rc; - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) @@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) /* * Size includes space for nested attributes */ - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); is_thread_group = !!taskstats_tgid_alloc(tsk); if (is_thread_group) { -- cgit v1.2.3