From: Bhavana Nagendra <bnagendr@redhat.com> Date: Mon, 17 Dec 2007 15:49:14 -0500 Subject: [xen] support for architectural pstate driver Message-id: 20071217204917.32238.91246.sendpatchset@localhost.localdomain O-Subject: [RHEL5.2 PATCH 1/2] Linux support for architectural pstate driver Bugzilla: 419171 Resolves BZ 419171 With the third generation Opteron parts, AMD switched to an architecturally defined interface for PowerNow! that uses different MSRs than previous versions. This patch brings the PowerNow! driver up to match the mainline Linux driver and provide support for all AMD parts that use or will use the architectural pstate interface. The latest series of Turion X2 processors have a new XFAM model. This patch set also includes a fix to the latest Patch info: This patch set (one for Linux and another for Xen) applies cleanly on -61 and goes on top of Rik's merge of Power Now! in Xen patch set. Testing: Barcelona 2p/8c system - both bare metal and Xen tested with ondemand governor wattage change noted with a power meter. Also usual userspace governor tests done. Upstream links: http://xenbits.xensource.com/xen-unstable.hg?rev/ddc9e6b2babb http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/a37a8c474d8b http://xenbits.xensource.com/staging/linux-2.6.18-xen.hg?rev/922fc4040264 http://bugzilla.kernel.org/show_bug.cgi?id=9561 -- This patch submitted to cpufreq mailing list and LKML. Please review and provide ACKs. Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Prarit Bhargava <prarit@redhat.com> diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 9b7578b..76275a8 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -47,7 +47,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 2.10.00" +#define VERSION "version 2.20.00" #include "powernow-k8.h" /* serialize freq changes */ @@ -76,33 +76,10 @@ static u32 find_khz_freq_from_fid(u32 fid) return 1000 * find_freq_from_fid(fid); } -/* Return a frequency in MHz, given an input fid and did */ -static u32 find_freq_from_fiddid(u32 fid, u32 did) +static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate) { - if (current_cpu_data.x86 == 0x10) - return 100 * (fid + 0x10) >> did; - else - return 100 * (fid + 0x8) >> did; -} - -static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) -{ - return 1000 * find_freq_from_fiddid(fid, did); -} - -static u32 find_fid_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return lo & HW_PSTATE_FID_MASK; -} - -static u32 find_did_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; -} + return data[pstate].frequency; + } /* Return the vco fid for an input fid * @@ -145,9 +122,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) if (cpu_family == CPU_HW_PSTATE) { rdmsr(MSR_PSTATE_STATUS, lo, hi); i = lo & HW_PSTATE_MASK; - rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); - data->currfid = lo & HW_PSTATE_FID_MASK; - data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + data->currpstate = i; return 0; } do { @@ -320,7 +295,7 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, static int transition_pstate(struct powernow_k8_data *data, u32 pstate) { wrmsr(MSR_PSTATE_CTRL, pstate, 0); - data->currfid = find_fid_from_pstate(pstate); + data->currpstate = pstate; return 0; } @@ -912,41 +887,22 @@ err_out: static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) { int i; + u32 hi = 0, lo = 0; + rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); + data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; for (i = 0; i < data->acpi_data->state_count; i++) { u32 index; - u32 hi = 0, lo = 0; - u32 fid; - u32 did; index = data->acpi_data->states[i].control & HW_PSTATE_MASK; - if (index > MAX_HW_PSTATE) { + if (index > data->max_hw_pstate) { printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); - } - rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); - if (!(hi & HW_PSTATE_VALID_MASK)) { - dprintk("invalid pstate %d, ignoring\n", index); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; continue; } + powernow_table[i].index = index; + powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000; - fid = lo & HW_PSTATE_FID_MASK; - did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; - - dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); - - powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); - - powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); - - if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } } return 0; } @@ -1112,22 +1068,18 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i /* Take a frequency, and issue the hardware pstate transition command */ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) { - u32 fid = 0; - u32 did = 0; u32 pstate = 0; int res, i; struct cpufreq_freqs freqs; dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - /* get fid did for hardware pstate transition */ + /* get MSR index for hardware pstate transition */ pstate = index & HW_PSTATE_MASK; - if (pstate > MAX_HW_PSTATE) + if (pstate > data->max_hw_pstate) return 0; - fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; - did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; - freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); - freqs.new = find_khz_freq_from_fiddid(fid, did); + freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; @@ -1135,9 +1087,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i } res = transition_pstate(data, pstate); - data->currfid = find_fid_from_pstate(pstate); - data->currdid = find_did_from_pstate(pstate); - freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); + freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; @@ -1182,10 +1132,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (query_current_values_with_pending_wait(data)) goto err_out; - if (cpu_family == CPU_HW_PSTATE) - dprintk("targ: curr fid 0x%x, did 0x%x\n", - data->currfid, data->currvid); - else { + if (cpu_family != CPU_HW_PSTATE) { dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1215,7 +1162,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi mutex_unlock(&fidvid_mutex); if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate); else pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -1338,7 +1285,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) + (3 * (1 << data->irt) * 10)) * 1000; if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else pol->cur = find_khz_freq_from_fid(data->currfid); dprintk("policy current frequency %d kHz\n", pol->cur); @@ -1355,8 +1302,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); + dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate); else dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1424,7 +1370,7 @@ static unsigned int powernowk8_get (unsigned int cpu) goto out; if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); + khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else khz = find_khz_freq_from_fid(data->currfid); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 281e52d..7528192 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -10,6 +10,7 @@ struct powernow_k8_data { u32 numps; /* number of p-states */ u32 batps; /* number of p-states supported on battery */ + u32 max_hw_pstate; /* maximum legal hardware pstate */ /* these values are constant when the PSB is used to determine * vid/fid pairings, but are modified during the ->target() call @@ -21,8 +22,8 @@ struct powernow_k8_data { u32 plllock; /* pll lock time, units 1 us */ u32 exttype; /* extended interface = 1 */ - /* keep track of the current fid / vid or did */ - u32 currvid, currfid, currdid; + /* keep track of the current fid / vid or pstate */ + u32 currvid, currfid, currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. @@ -47,7 +48,7 @@ struct powernow_k8_data { #define CPUID_XFAM 0x0ff00000 /* extended family */ #define CPUID_XFAM_K8 0 #define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_G 0x00060000 +#define CPUID_XMOD_REV_G 0x000c0000 #define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ #define CPUID_USE_XFAM_XMOD 0x00000f00 #define CPUID_GET_MAX_CAPABILITIES 0x80000000 @@ -88,23 +89,14 @@ struct powernow_k8_data { /* Hardware Pstate _PSS and MSR definitions */ #define USE_HW_PSTATE 0x00000080 -#define HW_PSTATE_FID_MASK 0x0000003f -#define HW_PSTATE_DID_MASK 0x000001c0 -#define HW_PSTATE_DID_SHIFT 6 -#define HW_PSTATE_MASK 0x00000007 -#define HW_PSTATE_VALID_MASK 0x80000000 -#define HW_FID_INDEX_SHIFT 8 -#define HW_FID_INDEX_MASK 0x0000ff00 -#define HW_DID_INDEX_SHIFT 16 -#define HW_DID_INDEX_MASK 0x00ff0000 -#define HW_WATTS_MASK 0xff -#define HW_PWR_DVR_MASK 0x300 -#define HW_PWR_DVR_SHIFT 8 -#define HW_PWR_MAX_MULT 3 -#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ +#define HW_PSTATE_MASK 0x00000007 +#define HW_PSTATE_VALID_MASK 0x80000000 +#define HW_PSTATE_MAX_MASK 0x000000f0 +#define HW_PSTATE_MAX_SHIFT 4 #define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ #define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ #define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ +#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ /* define the two driver architectures */ #define CPU_OPTERON 0