Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 2114

kernel-2.6.18-238.el5.src.rpm

From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 12 Jan 2010 14:10:26 -0500
Subject: [misc] taskstats: new structure/cmd to avoid KABI break
Message-id: <1263305427-17138-4-git-send-email-jolsa@redhat.com>
Patchwork-id: 22434
O-Subject: [PATCH v2 RHEL5.6 3/4] BZ 516961 taskstats - new structure/command to
	avoid KABI breakage
Bugzilla: 516961
RH-Acked-by: Jerome Marchand <jmarchan@redhat.com>
RH-Acked-by: Anton Arapov <Anton@redhat.com>

This change introduces new command TASKSTATS_CMD_ATTR_PID_V4,
which will return the version 4 taskstats structure.

wbr,
jirka

diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 6825353..76916a3 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -30,12 +30,72 @@
  *	c) add new fields after version comment; maintain 64-bit alignment
  */
 
+#define TASKSTATS_VERSION	1
 
-#define TASKSTATS_VERSION	4
+struct taskstats {
+
+	/* Version 1 */
+	__u16	version;
+	__u16	padding[3];	/* Userspace should not interpret the padding
+				 * field which can be replaced by useful
+				 * fields if struct taskstats is extended.
+				 */
+
+	/* Delay accounting fields start
+	 *
+	 * All values, until comment "Delay accounting fields end" are
+	 * available only if delay accounting is enabled, even though the last
+	 * few fields are not delays
+	 *
+	 * xxx_count is the number of delay values recorded
+	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+	 *
+	 * xxx_delay_total wraps around to zero on overflow
+	 * xxx_count incremented regardless of overflow
+	 */
+
+	/* Delay waiting for cpu, while runnable
+	 * count, delay_total NOT updated atomically
+	 */
+	__u64	cpu_count;
+	__u64	cpu_delay_total;
+
+	/* Following four fields atomically updated using task->delays->lock */
+
+	/* Delay waiting for synchronous block I/O to complete
+	 * does not account for delays in I/O submission
+	 */
+	__u64	blkio_count;
+	__u64	blkio_delay_total;
+
+	/* Delay waiting for page fault I/O (swap in only) */
+	__u64	swapin_count;
+	__u64	swapin_delay_total;
+
+	/* cpu "wall-clock" running time
+	 * On some architectures, value will adjust for cpu time stolen
+	 * from the kernel in involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_real_total;
+
+	/* cpu "virtual" running time
+	 * Uses time intervals seen by the kernel i.e. no adjustment
+	 * for kernel's involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_virtual_total;
+	/* Delay accounting fields end */
+	/* version 1 ends here */
+};
+
+#define TASKSTATS_VERSION_V4	4
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
-struct taskstats {
+struct taskstats_v4 {
 
 	/* The version number of this struct. This field is always set to
 	 * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
@@ -174,6 +234,7 @@ enum {
 	TASKSTATS_TYPE_STATS,		/* taskstats structure */
 	TASKSTATS_TYPE_AGGR_PID,	/* contains pid + stats */
 	TASKSTATS_TYPE_AGGR_TGID,	/* contains tgid + stats */
+	TASKSTATS_TYPE_STATS_V4,	/* taskstats structure v4 */
 	__TASKSTATS_TYPE_MAX,
 };
 
@@ -185,6 +246,7 @@ enum {
 	TASKSTATS_CMD_ATTR_TGID,
 	TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
 	TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
+	TASKSTATS_CMD_ATTR_PID_V4,
 	__TASKSTATS_CMD_ATTR_MAX,
 };
 
diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h
index 7e50ac7..4ac3519 100644
--- a/include/linux/tsacct_kern.h
+++ b/include/linux/tsacct_kern.h
@@ -10,18 +10,18 @@
 #include <linux/taskstats.h>
 
 #ifdef CONFIG_TASKSTATS
-extern void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk);
+extern void bacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *tsk);
 #else
-static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
+static inline void bacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *tsk)
 {}
 #endif /* CONFIG_TASKSTATS */
 
 #ifdef CONFIG_TASK_XACCT
-extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
+extern void xacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *p);
 extern void acct_update_integrals(struct task_struct *tsk);
 extern void acct_clear_integrals(struct task_struct *tsk);
 #else
-static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
+static inline void xacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *p)
 {}
 static inline void acct_update_integrals(struct task_struct *tsk)
 {}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 305865b..091dd06 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -200,13 +200,54 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
 	 */
 
 	delayacct_add_tsk(stats, tsk);
+	stats->version = TASKSTATS_VERSION;
+
+	/* Define err: label here if needed */
+	put_task_struct(tsk);
+	return rc;
+
+}
+
+static int fill_pid_v4(pid_t pid, struct taskstats_v4 *stats_v4)
+{
+	int rc = 0;
+	struct task_struct *tsk;
+	struct taskstats stats;
+
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid(pid);
+	if (!tsk) {
+		read_unlock(&tasklist_lock);
+		return -ESRCH;
+	}
+	get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+
+	/*
+	 * Each accounting subsystem adds calls to its functions to
+	 * fill in relevant parts of struct taskstsats as follows
+	 *
+	 *	per-task-foo(stats, tsk);
+	 */
+
+	delayacct_add_tsk(&stats, tsk);
+
+	/* copy v1 delayed statistics to the v4 struct */
+	stats_v4->cpu_count             = stats.cpu_count;
+	stats_v4->cpu_delay_total       = stats.cpu_delay_total;
+	stats_v4->blkio_count           = stats.blkio_count;
+	stats_v4->blkio_delay_total     = stats.blkio_delay_total;
+	stats_v4->swapin_count          = stats.swapin_count;
+	stats_v4->swapin_delay_total    = stats.swapin_delay_total;
+	stats_v4->cpu_run_real_total    = stats.cpu_run_real_total;
+	stats_v4->cpu_run_virtual_total = stats.cpu_run_virtual_total;
 
 	/* fill in basic acct fields */
-	stats->version = TASKSTATS_VERSION;
-	bacct_add_tsk(stats, tsk);
+	stats_v4->version = TASKSTATS_VERSION_V4;
+	bacct_add_tsk(stats_v4, tsk);
 
 	/* fill in extended acct fields */
-	xacct_add_tsk(stats, tsk);
+	xacct_add_tsk(stats_v4, tsk);
 
 	/* Define err: label here if needed */
 	put_task_struct(tsk);
@@ -384,10 +425,14 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 	/*
 	 * Size includes space for nested attributes
 	 */
-	size = nla_total_size(sizeof(u32)) +
-		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+	size = nla_total_size(sizeof(struct taskstats));
+	if (info->attrs[TASKSTATS_CMD_ATTR_PID_V4]) {
+		size = nla_total_size(sizeof(struct taskstats_v4));
+	} else
+		memset(&stats, 0, sizeof(stats));
+
+	size += nla_total_size(sizeof(u32)) + nla_total_size(0);
 
-	memset(&stats, 0, sizeof(stats));
 	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
 	if (rc < 0)
 		return rc;
@@ -402,6 +447,19 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 		NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid);
 		NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
 				stats);
+	} else if (info->attrs[TASKSTATS_CMD_ATTR_PID_V4]) {
+		struct taskstats_v4 stats_v4;
+
+		memset(&stats_v4, 0, sizeof(stats_v4));
+		u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID_V4]);
+		rc = fill_pid_v4(pid, &stats_v4);
+		if (rc < 0)
+			goto err;
+
+		na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
+		NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid);
+		NLA_PUT_TYPE(rep_skb, struct taskstats_v4, TASKSTATS_TYPE_STATS_V4,
+				stats_v4);
 	} else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
 		u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
 		rc = fill_tgid(tgid, NULL, &stats);
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 89d97ca..70d998d 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -27,7 +27,7 @@
 /*
  * fill in basic accounting fields
  */
-void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
+void bacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *tsk)
 {
 	struct timespec uptime, ts;
 	s64 ac_etime;
@@ -75,7 +75,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
 /*
  * fill in extended accounting fields
  */
-void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
+void xacct_add_tsk(struct taskstats_v4 *stats, struct task_struct *p)
 {
 	/* convert pages-jiffies to Mbyte-usec */
 	stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;