Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 479

kernel-2.6.18-238.el5.src.rpm

From: Bhavna Sarathy <bnagendr@redhat.com>
Date: Mon, 16 Aug 2010 16:20:59 -0400
Subject: [cpufreq] add APERF/MPERF support for AMD processors
Message-id: <20100816162649.4233.9051.sendpatchset@localhost.localdomain>
Patchwork-id: 27643
O-Subject: [RHEL5.6 PATCH] Add APERF/MPERF support for AMD processors
Bugzilla: 621335
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>

Resolves BZ 621335

Effective Frequency Reporting is a mechanism by which software can query the
processor to obtain the effective, average frequency over a dynamically
determined period of time.

Starting with model 10 of Family 0x10, AMD processors support for APERF/MPERF.
Add support for identifying it and using it within cpufreq.  Move the APERF/MPERF
functions out of the acpi-cpufreq code and into their own file so they can easily
be shared.

Applies cleaning to -211 kernel.

Patch dependency note: please apply "per core frequency" patch first, before
applying this (aper/mperf) patch to avoid patching/fuzz issues, thanks.

Upstream commit:
x86, cpufreq: Add APERF/MPERF support for AMD processors

From: Mark Langsdorf <mark.langsdorf@amd.com>
(Upstream commit id: a2fed573f065e526bfd5cbf26e5491973d9e9aaa)

Tested family 0x10 model 10 systems successfully.

Please review, and ACK.

Signed-off-by: Jarod Wilson <jarod@redhat.com>

diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile
index c9fe427..89cbb5d 100644
--- a/arch/i386/kernel/cpu/cpufreq/Makefile
+++ b/arch/i386/kernel/cpu/cpufreq/Makefile
@@ -1,13 +1,13 @@
 obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
 obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
-obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o
+obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o mperf.o
 obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
 obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
 obj-$(CONFIG_SC520_CPUFREQ)		+= sc520_freq.o
 obj-$(CONFIG_X86_LONGRUN)		+= longrun.o  
 obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
 obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o mperf.o
 obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
 obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
 obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 574702a..acf1812 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -44,6 +44,8 @@
 #include <asm/delay.h>
 #include <asm/uaccess.h>
 
+#include "mperf.h"
+
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
 
 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
@@ -57,7 +59,6 @@ enum {
 };
 
 #define INTEL_MSR_RANGE		(0xffff)
-#define CPUID_6_ECX_APERFMPERF_CAPABILITY	(0x1)
 
 struct acpi_cpufreq_data {
 	struct acpi_processor_performance *acpi_data;
@@ -242,98 +243,6 @@ static u32 get_cur_val(cpumask_t mask)
 	return cmd.val;
 }
 
-/*
- * Return the measured active (C0) frequency on this CPU since last call
- * to this function.
- * Input: cpu number
- * Return: Average CPU frequency in terms of max frequency (zero on error)
- *
- * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
- * over a period of time, while CPU is in C0 state.
- * IA32_MPERF counts at the rate of max advertised frequency
- * IA32_APERF counts at the rate of actual CPU frequency
- * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
- * no meaning should be associated with absolute values of these MSRs.
- */
-static unsigned int get_measured_perf(unsigned int cpu)
-{
-	union {
-		struct {
-			u32 lo;
-			u32 hi;
-		} split;
-		u64 whole;
-	} aperf_cur, mperf_cur;
-
-	cpumask_t saved_mask;
-	unsigned int perf_percent;
-	unsigned int retval;
-
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (get_cpu() != cpu) {
-		/* We were not able to run on requested processor */
-		put_cpu();
-		return 0;
-	}
-
-	rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
-	rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
-
-	wrmsr(MSR_IA32_APERF, 0,0);
-	wrmsr(MSR_IA32_MPERF, 0,0);
-
-#ifdef __i386__
-	/*
-	 * We dont want to do 64 bit divide with 32 bit kernel
-	 * Get an approximate value. Return failure in case we cannot get
-	 * an approximate value.
-	 */
-	if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
-		int shift_count;
-		u32 h;
-
-		h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
-		shift_count = fls(h);
-
-		aperf_cur.whole >>= shift_count;
-		mperf_cur.whole >>= shift_count;
-	}
-
-	if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
-		int shift_count = 7;
-		aperf_cur.split.lo >>= shift_count;
-		mperf_cur.split.lo >>= shift_count;
-	}
-
-	if (aperf_cur.split.lo && mperf_cur.split.lo)
-		perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
-	else
-		perf_percent = 0;
-
-#else
-	if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
-		int shift_count = 7;
-		aperf_cur.whole >>= shift_count;
-		mperf_cur.whole >>= shift_count;
-	}
-
-	if (aperf_cur.whole && mperf_cur.whole)
-		perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
-	else
-		perf_percent = 0;
-
-#endif
-
-	retval = drv_data[cpu]->max_freq * perf_percent / 100;
-
-	put_cpu();
-	set_cpus_allowed(current, saved_mask);
-
-	dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
-	return retval;
-}
-
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
 	struct acpi_cpufreq_data *data = drv_data[cpu];
@@ -700,7 +609,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		unsigned int ecx;
 		ecx = cpuid_ecx(6);
 		if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
-			acpi_cpufreq_driver.getavg = get_measured_perf;
+			acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
 	}
 
 	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
diff --git a/arch/i386/kernel/cpu/cpufreq/mperf.c b/arch/i386/kernel/cpu/cpufreq/mperf.c
new file mode 100644
index 0000000..bbbebb3
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/mperf.c
@@ -0,0 +1,106 @@
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+
+#include "mperf.h"
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
+
+/*
+ * Return the measured active (C0) frequency on this CPU since last call
+ * to this function.
+ * Input: cpu number
+ * Return: Average CPU frequency in terms of max frequency (zero on error)
+ *
+ * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
+ * over a period of time, while CPU is in C0 state.
+ * IA32_MPERF counts at the rate of max advertised frequency
+ * IA32_APERF counts at the rate of actual CPU frequency
+ * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
+ * no meaning should be associated with absolute values of these MSRs.
+ */
+unsigned int cpufreq_get_measured_perf(unsigned int cpu)
+{
+	union {
+		struct {
+			u32 lo;
+			u32 hi;
+		} split;
+		u64 whole;
+	} aperf_cur, mperf_cur;
+
+	cpumask_t saved_mask;
+	unsigned int perf_percent;
+	unsigned int retval;
+	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (get_cpu() != cpu) {
+		/* We were not able to run on requested processor */
+		put_cpu();
+		return 0;
+	}
+
+	rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
+	rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
+
+	wrmsr(MSR_IA32_APERF, 0,0);
+	wrmsr(MSR_IA32_MPERF, 0,0);
+
+#ifdef __i386__
+	/*
+	 * We dont want to do 64 bit divide with 32 bit kernel
+	 * Get an approximate value. Return failure in case we cannot get
+	 * an approximate value.
+	 */
+	if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
+		int shift_count;
+		u32 h;
+
+		h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
+		shift_count = fls(h);
+
+		aperf_cur.whole >>= shift_count;
+		mperf_cur.whole >>= shift_count;
+	}
+
+	if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
+		int shift_count = 7;
+		aperf_cur.split.lo >>= shift_count;
+		mperf_cur.split.lo >>= shift_count;
+	}
+
+	if (aperf_cur.split.lo && mperf_cur.split.lo)
+		perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
+	else
+		perf_percent = 0;
+
+#else
+	if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
+		int shift_count = 7;
+		aperf_cur.whole >>= shift_count;
+		mperf_cur.whole >>= shift_count;
+	}
+
+	if (aperf_cur.whole && mperf_cur.whole)
+		perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
+	else
+		perf_percent = 0;
+
+#endif
+
+	retval = policy->cpuinfo.max_freq * perf_percent / 100;
+
+	put_cpu();
+	set_cpus_allowed(current, saved_mask);
+
+	dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
+	return retval;
+}
+
+EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
+MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/cpu/cpufreq/mperf.h b/arch/i386/kernel/cpu/cpufreq/mperf.h
new file mode 100644
index 0000000..c76fe9b
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/mperf.h
@@ -0,0 +1,10 @@
+/*
+ *  (c) 2010 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ */
+
+unsigned int cpufreq_get_measured_perf(unsigned int);
+
+#define CPUID_6_ECX_APERFMPERF_CAPABILITY	(0x1)
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index c32e28d..16344b7 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -50,6 +50,7 @@
 #define BFX PFX "BIOS error: "
 #define VERSION "version 2.20.00"
 #include "powernow-k8.h"
+#include "mperf.h"
 
 /* serialize freq changes  */
 static DEFINE_MUTEX(fidvid_mutex);
@@ -70,6 +71,8 @@ static int cpu_family = CPU_OPTERON;
 /* core performance boost */
 static bool cpb_capable, cpb_enabled;
 
+static struct cpufreq_driver cpufreq_amd64_driver;
+
 #ifndef CONFIG_SMP
 static cpumask_t cpu_core_map[1];
 #endif
@@ -1256,6 +1259,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 {
 	struct powernow_k8_data *data;
 	struct init_on_cpu init_on_cpu;
+	struct cpuinfo_x86 *c = &cpu_data[pol->cpu];
 	int rc;
 
 	if (!cpu_online(pol->cpu))
@@ -1328,6 +1332,14 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 		return -EINVAL;
 	}
 
+	/* Check for APERF/MPERF support in hardware */
+	if (c->x86_vendor == X86_VENDOR_AMD && c->cpuid_level >= 6) {
+		unsigned int ecx;
+		ecx = cpuid_ecx(6);
+		if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
+			cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
+	}
+
 	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
 
 	if (cpu_family == CPU_HW_PSTATE)
diff --git a/arch/x86_64/kernel/cpufreq/Makefile b/arch/x86_64/kernel/cpufreq/Makefile
index 753ce1d..b5f4e84 100644
--- a/arch/x86_64/kernel/cpufreq/Makefile
+++ b/arch/x86_64/kernel/cpufreq/Makefile
@@ -4,8 +4,8 @@
 
 SRCDIR := ../../../i386/kernel/cpu/cpufreq
 
-obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
+obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o
 obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
 obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
 obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
@@ -15,3 +15,4 @@ speedstep-centrino-objs := ${SRCDIR}/speedstep-centrino.o
 acpi-cpufreq-objs := ${SRCDIR}/acpi-cpufreq.o
 p4-clockmod-objs := ${SRCDIR}/p4-clockmod.o
 speedstep-lib-objs := ${SRCDIR}/speedstep-lib.o
+mperf-objs := ${SRCDIR}/mperf.o