Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > eab357269fb8735c5e1a2938e6c77cae > files > 3350

kernel-2.6.18-164.10.1.el5.src.rpm

From: John Villalovos <jvillalo@redhat.com>
Date: Wed, 16 Sep 2009 13:55:26 -0400
Subject: [x86] oprofile: support arch perfmon
Message-id: 20090916175525.GC5238@linuxjohn.usersys.redhat.com
O-Subject: [RHEL 5.5 BZ523479 Patch 3/3] Support arch perfmon in oprofile
Bugzilla: 523479
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>
RH-Acked-by: Markus Armbruster <armbru@redhat.com>

[RHEL 5.5 BZ523479 Patch 3/3] Support arch perfmon in oprofile
https://bugzilla.redhat.com/show_bug.cgi?id=523479

This third patch is the arch perfmon patches and associated fixes.

commit 6c4b5c2adfd6fd6cab9f783204c9654ad1790697
Author: John L. Villalovos <jvillalo@redhat.com>
Date:   Tue Sep 15 13:53:09 2009 -0400

    Backport of the following commits:
        Upstream commit 86975101e46ec93be972d8f46715aa6273102545
        Upstream Author: stephane eranian <eranian@googlemail.com>
        Upstream Date:   Fri Mar 7 13:05:27 2008 -0800

    	x86: add cpu_has_arch_perfmon

    	adds cpu_has_arch_perfmon to test presence of architectural perfmon on
    	Intel x86 processor

        Upstream commit 5d4488027d9cf3161c71566dfabb116bf69ab4d9
        Upstream Author: Andi Kleen <ak@linux.intel.com>
        Upstream Date:   Mon Aug 18 14:49:47 2008 +0200

    	oprofile: drop const in num counters field

    	allow to modify it at runtime

        Upstream commit b99170288421c79f0c2efa8b33e26e65f4bb7fb8
        Upstream Author: Andi Kleen <ak@linux.intel.com>
        Upstream Date:   Mon Aug 18 14:50:31 2008 +0200

    	oprofile: Implement Intel architectural perfmon support

    	Newer Intel CPUs (Core1+) have support for architectural
    	events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.

    	The advantage of this is that it can be done without knowing about
    	the specific CPU, because the CPU describes by itself what
    	performance events are supported. This is only a fallback
    	because only a limited set of 6 events are supported.
    	This allows to do profiling on Nehalem and on Atom systems
    	(later not tested)

    	This patch implements support for that in oprofile's Intel
    	Family 6 profiling module. It also has the advantage of supporting
    	an arbitary number of events now as reported by the CPU.
    	Also allow arbitary counter widths >32bit while we're at it.

    	Requires a patched oprofile userland to support the new
    	architecture.

    	v2: update for latest oprofile tree
    	    remove force_arch_perfmon

        Upstream commit 59512900baab03c5629f2ff5efad1d5d4e682ece
        Upstream Author: Andi Kleen <ak@linux.intel.com>
        Upstream Date:   Mon Sep 29 22:23:33 2008 +0200

    	oprofile: discover counters for op ppro too

    	Discover number of counters for all family 6 models even when not
    	in arch perfmon mode.

        Upstream commit 7c64ade53a6f977d73f16243865c42ceae999aea
        Upstream Author: Andi Kleen <andi@firstfloor.org>
        Upstream Date:   Fri Nov 7 14:02:49 2008 +0100

    	oprofile: Fix p6 counter overflow check

    	Fix the counter overflow check for CPUs with counter width > 32

    	I had a similar change in a different patch that I didn't submit
    	and I didn't notice the problem earlier because it was always
    	tested together.

        Upstream commit a4a16beadea041ab601e65b264b568e8b6b4f68d
        Upstream Author: Eric Dumazet <dada1@cosmosbay.com>
        Upstream Date:   Mon Nov 10 09:05:37 2008 +0100

    	oprofile: fix an overflow in ppro code

    	reset_value was changed from long to u64 in commit
    	b99170288421c79f0c2efa8b33e26e65f4bb7fb8 (oprofile: Implement Intel
    	architectural perfmon support)

    	But dynamic allocation of this array use a wrong type (long instead of
    	u64)

        Upstream commit 780eef9492b16a1543a3b2ae9f9526a735fc9856
        Upstream Author: Tim Blechmann <tim@klingt.org>
        Upstream Date:   Thu Feb 19 17:34:03 2009 +0100

    	x86: oprofile: don't set counter width from cpuid on Core2

    	Impact: fix stuck NMIs and non-working oprofile on certain CPUs

    	Resetting the counter width of the performance counters on Intel's
    	Core2 CPUs, breaks the delivery of NMIs, when running in x86_64 mode.

    	This should fix bug #12395:

    	  http://bugzilla.kernel.org/show_bug.cgi?id=12395

diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index fc98c05..588800e 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -444,6 +444,16 @@ static int __init ppro_init(char **cpu_type)
 	return 1;
 }
 
+static int __init arch_perfmon_init(char **cpu_type)
+{
+	if (!cpu_has_arch_perfmon)
+		return 0;
+	*cpu_type = "i386/arch_perfmon";
+	model = &op_arch_perfmon_spec;
+	arch_perfmon_setup_counters();
+	return 1;
+}
+
 /* in order to get driverfs right */
 static int using_nmi;
 
@@ -451,7 +461,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 {
 	__u8 vendor = boot_cpu_data.x86_vendor;
 	__u8 family = boot_cpu_data.x86;
-	char *cpu_type;
+	char *cpu_type = NULL;
 	uint32_t eax, ebx, ecx, edx;
 
 	if (!cpu_has_apic)
@@ -496,19 +506,20 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 			switch (family) {
 				/* Pentium IV */
 				case 0xf:
-					if (!p4_init(&cpu_type))
-						return -ENODEV;
+					p4_init(&cpu_type);
 					break;
 
 				/* A P6-class processor */
 				case 6:
-					if (!ppro_init(&cpu_type))
-						return -ENODEV;
+					ppro_init(&cpu_type);
 					break;
 
 				default:
-					return -ENODEV;
+					break;
 			}
+
+			if (!cpu_type && !arch_perfmon_init(&cpu_type))
+				return -ENODEV;
 			break;
 
 		default:
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index 5e3d7df..42f5a33 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -1,31 +1,33 @@
 /**
  * @file op_model_ppro.h
- * pentium pro / P6 model-specific MSR operations
+ * Family 6 perfmon and architectural perfmon MSR operations
  *
  * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2008 Intel Corporation
  * @remark Read the file COPYING
  *
  * @author John Levon
  * @author Philippe Elie
  * @author Graydon Hoare
+ * @author Andi Kleen
  */
 
 #include <linux/oprofile.h>
+#include <linux/slab.h>
 #include <asm/ptrace.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
- 
+#include <asm/intel_arch_perfmon.h>
+
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS 2
-#define NUM_CONTROLS 2
+static int num_counters = 2;
+static int counter_width = 32;
 
 #define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
-#define CTR_32BIT_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
 
 #define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
 #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
@@ -39,20 +41,20 @@
 #define CTRL_SET_UM(val, m) (val |= (m << 8))
 #define CTRL_SET_EVENT(val, e) (val |= e)
 
-static unsigned long reset_value[NUM_COUNTERS];
- 
+static u64 *reset_value;
+
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
 	int i;
 
-	for (i=0; i < NUM_COUNTERS; i++) {
+	for (i = 0; i < num_counters; i++) {
 		if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
 			msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
 		else
 			msrs->counters[i].addr = 0;
 	}
-	
-	for (i=0; i < NUM_CONTROLS; i++) {
+
+	for (i = 0; i < num_counters; i++) {
 		if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
 			msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
 		else
@@ -66,28 +68,52 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 	unsigned int low, high;
 	int i;
 
+	if (!reset_value) {
+		reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
+					GFP_ATOMIC);
+		if (!reset_value)
+			return;
+	}
+
+	if (cpu_has_arch_perfmon) {
+		union cpuid10_eax eax;
+		eax.full = cpuid_eax(0xa);
+
+		/*
+		 * For Core2 (family 6, model 15), don't reset the
+		 * counter width:
+		 */
+		if (!(eax.split.version_id == 0 &&
+			current_cpu_data.x86 == 6 &&
+				current_cpu_data.x86_model == 15)) {
+
+			if (counter_width < eax.split.bit_width)
+				counter_width = eax.split.bit_width;
+		}
+	}
+
 	/* clear all counters */
-	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+	for (i = 0 ; i < num_counters; ++i) {
 		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
 			continue;
 		CTRL_READ(low, high, msrs, i);
 		CTRL_CLEAR(low);
 		CTRL_WRITE(low, high, msrs, i);
 	}
-	
+
 	/* avoid a false detection of ctr overflows in NMI handler */
-	for (i = 0; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (unlikely(!CTR_IS_RESERVED(msrs,i)))
 			continue;
-		CTR_32BIT_WRITE(1, msrs, i);
+		wrmsrl(msrs->counters[i].addr, -1LL);
 	}
 
 	/* enable active counters */
-	for (i = 0; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
 			reset_value[i] = counter_config[i].count;
 
-			CTR_32BIT_WRITE(counter_config[i].count, msrs, i);
+			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 
 			CTRL_READ(low, high, msrs, i);
 			CTRL_CLEAR(low);
@@ -107,16 +133,16 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 static int ppro_check_ctrs(struct pt_regs * const regs,
 			   struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
- 
-	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+
+	for (i = 0 ; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-		CTR_READ(low, high, msrs, i);
-		if (CTR_OVERFLOWED(low)) {
+		rdmsrl(msrs->counters[i].addr, val);
+		if (CTR_OVERFLOWED(val)) {
 			oprofile_add_sample(regs, i);
-			CTR_32BIT_WRITE(reset_value[i], msrs, i);
+			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 		}
 	}
 
@@ -140,7 +166,7 @@ static void ppro_start(struct op_msrs const * const msrs)
 	unsigned int low,high;
 	int i;
 
-	for (i = 0; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (reset_value[i]) {
 			CTRL_READ(low, high, msrs, i);
 			CTRL_SET_ACTIVE(low);
@@ -155,7 +181,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
 	unsigned int low,high;
 	int i;
 
-	for (i = 0; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
 		CTRL_READ(low, high, msrs, i);
@@ -168,20 +194,66 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 {
 	int i;
 
-	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+	for (i = 0 ; i < num_counters ; ++i) {
 		if (CTR_IS_RESERVED(msrs,i))
 			release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
 	}
-	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+	for (i = 0 ; i < num_counters ; ++i) {
 		if (CTRL_IS_RESERVED(msrs,i))
 			release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
 	}
+	if (reset_value) {
+		kfree(reset_value);
+		reset_value = NULL;
+	}
+}
+
+struct op_x86_model_spec op_ppro_spec = {
+	.num_counters = 2,	/* can be overriden */
+	.num_controls = 2,	/* dito */
+	.fill_in_addresses = &ppro_fill_in_addresses,
+	.setup_ctrs = &ppro_setup_ctrs,
+	.check_ctrs = &ppro_check_ctrs,
+	.start = &ppro_start,
+	.stop = &ppro_stop,
+	.shutdown = &ppro_shutdown
+};
+
+/*
+ * Architectural performance monitoring.
+ *
+ * Newer Intel CPUs (Core1+) have support for architectural
+ * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
+ * The advantage of this is that it can be done without knowing about
+ * the specific CPU.
+ */
+
+void arch_perfmon_setup_counters(void)
+{
+	union cpuid10_eax eax;
+
+	eax.full = cpuid_eax(0xa);
+
+	/* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
+	if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
+		current_cpu_data.x86_model == 15) {
+		eax.split.version_id = 2;
+		eax.split.num_counters = 2;
+		eax.split.bit_width = 40;
+	}
+
+	num_counters = eax.split.num_counters;
+
+	op_arch_perfmon_spec.num_counters = num_counters;
+	op_arch_perfmon_spec.num_controls = num_counters;
+	op_ppro_spec.num_counters = num_counters;
+	op_ppro_spec.num_controls = num_counters;
 }
 
-struct op_x86_model_spec const op_ppro_spec = {
-	.num_counters = NUM_COUNTERS,
-	.num_controls = NUM_CONTROLS,
+struct op_x86_model_spec op_arch_perfmon_spec = {
+	/* num_counters/num_controls filled in at runtime */
 	.fill_in_addresses = &ppro_fill_in_addresses,
+	/* user space does the cpuid check for available events */
 	.setup_ctrs = &ppro_setup_ctrs,
 	.check_ctrs = &ppro_check_ctrs,
 	.start = &ppro_start,
diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h
index 82b1019..aa286ae 100644
--- a/arch/i386/oprofile/op_x86_model.h
+++ b/arch/i386/oprofile/op_x86_model.h
@@ -64,8 +64,8 @@ struct pt_regs;
  * various x86 CPU model's perfctr support.
  */
 struct op_x86_model_spec {
-	unsigned int const num_counters;
-	unsigned int const num_controls;
+	unsigned int num_counters;
+	unsigned int num_controls;
 	void (*fill_in_addresses)(struct op_msrs * const msrs);
 	void (*setup_ctrs)(struct op_msrs const * const msrs);
 	int (*check_ctrs)(struct pt_regs * const regs,
@@ -75,10 +75,13 @@ struct op_x86_model_spec {
 	void (*shutdown)(struct op_msrs const * const msrs);
 };
 
-extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec op_ppro_spec;
 extern struct op_x86_model_spec const op_p4_spec;
 extern struct op_x86_model_spec const op_p4_ht2_spec;
 extern struct op_x86_model_spec const op_athlon_spec;
+extern struct op_x86_model_spec op_arch_perfmon_spec;
+
+extern void arch_perfmon_setup_counters(void);
 
 /* setup AMD Family 10H IBS IRQ if needed */
 extern void setup_ibs_nmi(void);
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index 3a9f815..4a20215 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -154,6 +154,7 @@
 #define cpu_has_phe_enabled	boot_cpu_has(X86_FEATURE_PHE_EN)
 #define cpu_has_pmm		boot_cpu_has(X86_FEATURE_PMM)
 #define cpu_has_pmm_enabled	boot_cpu_has(X86_FEATURE_PMM_EN)
+#define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_xmm4_1		boot_cpu_has(X86_FEATURE_XMM4_1)
 #define cpu_has_xmm4_2		boot_cpu_has(X86_FEATURE_XMM4_2)
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
index ced73f0..a998857 100644
--- a/include/asm-x86_64/cpufeature.h
+++ b/include/asm-x86_64/cpufeature.h
@@ -137,6 +137,7 @@
 #define cpu_has_cyrix_arr      0
 #define cpu_has_centaur_mcr    0
 #define cpu_has_clflush	       boot_cpu_has(X86_FEATURE_CLFLSH)
+#define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_xmm4_1		boot_cpu_has(X86_FEATURE_XMM4_1)
 #define cpu_has_xmm4_2		boot_cpu_has(X86_FEATURE_XMM4_2)