From: Jiri Olsa <jolsa@redhat.com> Date: Tue, 12 Jan 2010 14:10:26 -0500 Subject: [misc] taskstats: new structure/cmd to avoid KABI break Message-id: <1263305427-17138-4-git-send-email-jolsa@redhat.com> Patchwork-id: 22434 O-Subject: [PATCH v2 RHEL5.6 3/4] BZ 516961 taskstats - new structure/command to avoid KABI breakage Bugzilla: 516961 RH-Acked-by: Jerome Marchand <jmarchan@redhat.com> RH-Acked-by: Anton Arapov <Anton@redhat.com> This change introduces new command TASKSTATS_CMD_ATTR_PID_V4, which will return the version 4 taskstats structure. wbr, jirka diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 6825353..76916a3 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -30,12 +30,72 @@ * c) add new fields after version comment; maintain 64-bit alignment */ +#define TASKSTATS_VERSION 1 -#define TASKSTATS_VERSION 4 +struct taskstats { + + /* Version 1 */ + __u16 version; + __u16 padding[3]; /* Userspace should not interpret the padding + * field which can be replaced by useful + * fields if struct taskstats is extended. + */ + + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count; + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ +}; + +#define TASKSTATS_VERSION_V4 4 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ -struct taskstats { +struct taskstats_v4 { /* The version number of this struct. This field is always set to * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>. @@ -174,6 +234,7 @@ enum { TASKSTATS_TYPE_STATS, /* taskstats structure */ TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ + TASKSTATS_TYPE_STATS_V4, /* taskstats structure v4 */ __TASKSTATS_TYPE_MAX, }; @@ -185,6 +246,7 @@ enum { TASKSTATS_CMD_ATTR_TGID, TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, + TASKSTATS_CMD_ATTR_PID_V4, __TASKSTATS_CMD_ATTR_MAX, }; diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 7e50ac7..4ac3519 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -10,18 +10,18 @@ #include <linux/taskstats.h> #ifdef CONFIG_TASKSTATS -extern void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk); +extern void bacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *tsk); #else -static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +static inline void bacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *tsk) {} #endif /* CONFIG_TASKSTATS */ #ifdef CONFIG_TASK_XACCT -extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); +extern void xacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *p); extern void acct_update_integrals(struct task_struct *tsk); extern void acct_clear_integrals(struct task_struct *tsk); #else -static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) +static inline void xacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *p) {} static inline void acct_update_integrals(struct task_struct *tsk) {} diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 305865b..091dd06 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -200,13 +200,54 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, */ delayacct_add_tsk(stats, tsk); + stats->version = TASKSTATS_VERSION; + + /* Define err: label here if needed */ + put_task_struct(tsk); + return rc; + +} + +static int fill_pid_v4(pid_t pid, struct taskstats_v4 *stats_v4) +{ + int rc = 0; + struct task_struct *tsk; + struct taskstats stats; + + read_lock(&tasklist_lock); + tsk = find_task_by_pid(pid); + if (!tsk) { + read_unlock(&tasklist_lock); + return -ESRCH; + } + get_task_struct(tsk); + read_unlock(&tasklist_lock); + + /* + * Each accounting subsystem adds calls to its functions to + * fill in relevant parts of struct taskstsats as follows + * + * per-task-foo(stats, tsk); + */ + + delayacct_add_tsk(&stats, tsk); + + /* copy v1 delayed statistics to the v4 struct */ + stats_v4->cpu_count = stats.cpu_count; + stats_v4->cpu_delay_total = stats.cpu_delay_total; + stats_v4->blkio_count = stats.blkio_count; + stats_v4->blkio_delay_total = stats.blkio_delay_total; + stats_v4->swapin_count = stats.swapin_count; + stats_v4->swapin_delay_total = stats.swapin_delay_total; + stats_v4->cpu_run_real_total = stats.cpu_run_real_total; + stats_v4->cpu_run_virtual_total = stats.cpu_run_virtual_total; /* fill in basic acct fields */ - stats->version = TASKSTATS_VERSION; - bacct_add_tsk(stats, tsk); + stats_v4->version = TASKSTATS_VERSION_V4; + bacct_add_tsk(stats_v4, tsk); /* fill in extended acct fields */ - xacct_add_tsk(stats, tsk); + xacct_add_tsk(stats_v4, tsk); /* Define err: label here if needed */ put_task_struct(tsk); @@ -384,10 +425,14 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) /* * Size includes space for nested attributes */ - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = nla_total_size(sizeof(struct taskstats)); + if (info->attrs[TASKSTATS_CMD_ATTR_PID_V4]) { + size = nla_total_size(sizeof(struct taskstats_v4)); + } else + memset(&stats, 0, sizeof(stats)); + + size += nla_total_size(sizeof(u32)) + nla_total_size(0); - memset(&stats, 0, sizeof(stats)); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); if (rc < 0) return rc; @@ -402,6 +447,19 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, stats); + } else if (info->attrs[TASKSTATS_CMD_ATTR_PID_V4]) { + struct taskstats_v4 stats_v4; + + memset(&stats_v4, 0, sizeof(stats_v4)); + u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID_V4]); + rc = fill_pid_v4(pid, &stats_v4); + if (rc < 0) + goto err; + + na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); + NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); + NLA_PUT_TYPE(rep_skb, struct taskstats_v4, TASKSTATS_TYPE_STATS_V4, + stats_v4); } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); rc = fill_tgid(tgid, NULL, &stats); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 89d97ca..70d998d 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -27,7 +27,7 @@ /* * fill in basic accounting fields */ -void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +void bacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *tsk) { struct timespec uptime, ts; s64 ac_etime; @@ -75,7 +75,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) /* * fill in extended accounting fields */ -void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) +void xacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *p) { /* convert pages-jiffies to Mbyte-usec */ stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;