Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1414

kernel-2.6.18-238.el5.src.rpm

From: Dean Nelson <dnelson@redhat.com>
Date: Thu, 8 Apr 2010 18:13:14 -0400
Subject: [hwmon] add support for additional CPU models to coretemp
Message-id: <20100408181314.4071.25362.send-patch@localhost.localdomain>
Patchwork-id: 24050
O-Subject: [RHEL5.6 PATCH] hwmon: add support for additional CPU models to
	coretemp
Bugzilla: 559228
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>

Resolves RHBZ 559228.

In RHEL5.5, a modprobe of the coretemp hwmon module fails on systems with
CPU family of 6 and CPU model of 23. If run from the console one sees (note
that the displayed 'model 17' should be 'model 0x17':

  [root@intel-mccreary-02 ~]# modprobe coretemp
  coretemp: Unknown CPU model 17
		:
  FATAL: Error inserting coretemp (/lib/modules/2.6.18-194.el5/kernel/drivers/hwmon/coretemp.ko): No such device
  [root@intel-mccreary-02 ~]#

(The displayed '17' should be '0x17'.)

In backporting the necessary upstream commits to support CPU model 23, support
for CPU models 22, 26, 28 and 30 was also added.

The following upstream commits were backported:
http://git.kernel.org/linus/d2bc7b135a948f788646e3a7aff9ac5597f42f4f
http://git.kernel.org/linus/59a35bafb223bbb0553ba1a3bb9280bda668a8d8
http://git.kernel.org/linus/d58ee056cc40117afe4d3bb03006ac537d779634
http://git.kernel.org/linus/c940336b4403540c498fceb102c7142799252129
http://git.kernel.org/linus/561d9a969455cb009bb15b63e1d925dc527e7a9d
http://git.kernel.org/linus/6369a2887a1b35fde91573adc650528e3efea8e9
http://git.kernel.org/linus/ba7c1927aa69c4dfe1ecf646f03b306e49dc8e37
http://git.kernel.org/linus/118a88718886a6cb7fb2cf7fb77ef2eea30c73a1
http://git.kernel.org/linus/ae770152c801f10a91e5e86597a39b5f9ccf2d0d
http://git.kernel.org/linus/34c86c1e622ec77ba81c01969003bbc8e15156f3
http://git.kernel.org/linus/0bf41d9f414a5cf558aff234a0ff486257537574
http://git.kernel.org/linus/708a62bcd5f699756bae81491e64648fbf19e2a4
http://git.kernel.org/linus/eccfed42215bebda0acc3158c1a4ff8325dea275
http://git.kernel.org/linus/fa08acd7d16cd7ea8114f3844b0ef2505a4276a8
http://git.kernel.org/linus/1fe63ab47a617ee95f562eaa7ddbbc59981ff8c6
http://git.kernel.org/linus/4d7a5644e4adfafe76c2bd8ee168e3f3b5dae3a8

The following upstream commits were NOT backported:
http://git.kernel.org/linus/1beeffe43311f64df8dd0ab08ff6b1858c58363f
http://git.kernel.org/linus/92cb7612aee39642d109b8d935ad265e602c0563

Brew build:
https://brewweb.devel.redhat.com/taskinfo?taskID=2366045

Tested on the following systems (all with CPU family=6):
  ibm-hs21-04.lab.bos.redhat.com	CPU model=23
  dell-per710-01.lab.bos.redhat.com	CPU model=26
  dell-pet310-01.lab.bos.redhat.com	CPU model=30
Didn't have access to systems with CPU model=22 or 28.


diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp
new file mode 100644
index 0000000..92267b6
--- /dev/null
+++ b/Documentation/hwmon/coretemp
@@ -0,0 +1,40 @@
+Kernel driver coretemp
+======================
+
+Supported chips:
+  * All Intel Core family
+    Prefix: 'coretemp'
+    CPUID: family 0x6, models 0xe (Pentium M DC), 0xf (Core 2 DC 65nm),
+                              0x16 (Core 2 SC 65nm), 0x17 (Penryn 45nm),
+                              0x1a (Nehalem), 0x1c (Atom), 0x1e (Lynnfield)
+    Datasheet: Intel 64 and IA-32 Architectures Software Developer's Manual
+               Volume 3A: System Programming Guide
+               http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
+
+Author: Rudolf Marek
+
+Description
+-----------
+
+This driver permits reading temperature sensor embedded inside Intel Core CPU.
+Temperature is measured in degrees Celsius and measurement resolution is
+1 degree C. Valid temperatures are from 0 to TjMax degrees C, because
+the actual value of temperature register is in fact a delta from TjMax.
+
+Temperature known as TjMax is the maximum junction temperature of processor.
+Intel defines this temperature as 85C or 100C. At this temperature, protection
+mechanism will perform actions to forcibly cool down the processor. Alarm
+may be raised, if the temperature grows enough (more than TjMax) to trigger
+the Out-Of-Spec bit. Following table summarizes the exported sysfs files:
+
+temp1_input	 - Core temperature (in millidegrees Celsius).
+temp1_max	 - All cooling devices should be turned on (on Core2).
+temp1_crit	 - Maximum junction temperature (in millidegrees Celsius).
+temp1_crit_alarm - Set when Out-of-spec bit is set, never clears.
+		   Correct CPU operation is no longer guaranteed.
+temp1_label	 - Contains string "Core X", where X is processor
+		   number.
+
+The TjMax temperature is set to 85 degrees C if undocumented model specific
+register (UMSR) 0xee has bit 30 set. If not the TjMax is 100 degrees C as
+(sometimes) documented in processor datasheet.
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 4977a9e..c032a8e 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -325,12 +325,12 @@ config SENSORS_GL520SM
 	  will be called gl520sm.
 
 config SENSORS_CORETEMP
-	tristate "Intel Core (2) Duo/Solo temperature sensor"
-	depends on X86 && EXPERIMENTAL &&!XEN
+	tristate "Intel Core/Core2/Atom temperature sensor"
+	depends on X86 && PCI && EXPERIMENTAL &&!XEN
 	help
 	  If you say yes here you get support for the temperature
-	  sensor inside your CPU. Supported all are all known variants
-	  of Intel Core family.
+	  sensor inside your CPU. Most of the family 6 CPUs
+	  are supported. Check documentation/driver for details.
 
 config SENSORS_IBMPEX
 	tristate "IBM PowerExecutive temperature/power sensors"
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 28b28e9..1647c53 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -33,12 +33,14 @@
 #include <linux/list.h>
 #include <linux/platform_device.h>
 #include <linux/cpu.h>
+#include <linux/pci.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
 
 #define DRVNAME	"coretemp"
 
-typedef enum { SHOW_TEMP, SHOW_TJMAX, SHOW_LABEL, SHOW_NAME } SHOW;
+typedef enum { SHOW_TEMP, SHOW_TJMAX, SHOW_TTARGET, SHOW_LABEL,
+		SHOW_NAME } SHOW;
 
 /*
  * Functions declaration
@@ -55,11 +57,10 @@ struct coretemp_data {
 	unsigned long last_updated;	/* in jiffies */
 	int temp;
 	int tjmax;
+	int ttarget;
 	u8 alarm;
 };
 
-static struct coretemp_data *coretemp_update_device(struct device *dev);
-
 /*
  * Sysfs stuff
  */
@@ -95,9 +96,10 @@ static ssize_t show_temp(struct device *dev,
 
 	if (attr->index == SHOW_TEMP)
 		err = data->valid ? sprintf(buf, "%d\n", data->temp) : -EAGAIN;
-	else
+	else if (attr->index == SHOW_TJMAX)
 		err = sprintf(buf, "%d\n", data->tjmax);
-
+	else
+		err = sprintf(buf, "%d\n", data->ttarget);
 	return err;
 }
 
@@ -105,6 +107,8 @@ static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL,
 			  SHOW_TEMP);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp, NULL,
 			  SHOW_TJMAX);
+static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, show_temp, NULL,
+			  SHOW_TTARGET);
 static DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL);
 static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
 static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);
@@ -149,6 +153,94 @@ static struct coretemp_data *coretemp_update_device(struct device *dev)
 	return data;
 }
 
+static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
+{
+	/* The 100C is default for both mobile and non mobile CPUs */
+
+	int tjmax = 100000;
+	int tjmax_ee = 85000;
+	int usemsr_ee = 1;
+	int err;
+	u32 eax, edx;
+	struct pci_dev *host_bridge;
+
+	/* Early chips have no MSR for TjMax */
+
+	if ((c->x86_model == 0xf) && (c->x86_mask < 4)) {
+		usemsr_ee = 0;
+	}
+
+	/* Atom CPUs */
+
+	if (c->x86_model == 0x1c) {
+		usemsr_ee = 0;
+
+		host_bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
+
+		if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL
+		    && (host_bridge->device == 0xa000	/* NM10 based nettop */
+		    || host_bridge->device == 0xa010))	/* NM10 based netbook */
+			tjmax = 100000;
+		else
+			tjmax = 90000;
+
+		pci_dev_put(host_bridge);
+	}
+
+	if ((c->x86_model > 0xe) && (usemsr_ee)) {
+		u8 platform_id;
+
+		/* Now we can detect the mobile CPU using Intel provided table
+		   http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
+		   For Core2 cores, check MSR 0x17, bit 28 1 = Mobile CPU
+		*/
+
+		err = rdmsr_safe_on_cpu(id, 0x17, &eax, &edx);
+		if (err) {
+			dev_warn(dev,
+				 "Unable to access MSR 0x17, assuming desktop"
+				 " CPU\n");
+			usemsr_ee = 0;
+		} else if (c->x86_model < 0x17 && !(eax & 0x10000000)) {
+			/* Trust bit 28 up to Penryn, I could not find any
+			   documentation on that; if you happen to know
+			   someone at Intel please ask */
+			usemsr_ee = 0;
+		} else {
+			/* Platform ID bits 52:50 (EDX starts at bit 32) */
+			platform_id = (edx >> 18) & 0x7;
+
+			/* Mobile Penryn CPU seems to be platform ID 7 or 5
+			  (guesswork) */
+			if ((c->x86_model == 0x17) &&
+			    ((platform_id == 5) || (platform_id == 7))) {
+				/* If MSR EE bit is set, set it to 90 degrees C,
+				   otherwise 105 degrees C */
+				tjmax_ee = 90000;
+				tjmax = 105000;
+			}
+		}
+	}
+
+	if (usemsr_ee) {
+
+		err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx);
+		if (err) {
+			dev_warn(dev,
+				 "Unable to access MSR 0xEE, for Tjmax, left"
+				 " at default\n");
+		} else if (eax & 0x40000000) {
+			tjmax = tjmax_ee;
+		}
+	/* if we dont use msr EE it means we are desktop CPU (with exeception
+	   of Atom) */
+	} else if (tjmax == 100000) {
+		dev_warn(dev, "Using relative temperature scale!\n");
+	}
+
+	return tjmax;
+}
+
 static int __devinit coretemp_probe(struct platform_device *pdev)
 {
 	struct coretemp_data *data;
@@ -165,8 +257,6 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
 	data->id = pdev->id;
 	data->name = "coretemp";
 	mutex_init(&data->update_lock);
-	/* Tjmax default is 100 degrees C */
-	data->tjmax = 100000;
 
 	/* test if we can access the THERM_STATUS MSR */
 	err = rdmsr_safe_on_cpu(data->id, MSR_IA32_THERM_STATUS, &eax, &edx);
@@ -193,40 +283,29 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
 		}
 	}
 
-	/* Some processors have Tjmax 85 following magic should detect it
-	   Intel won't disclose the information without signed NDA, but
-	   individuals cannot sign it. Catch(ed) 22.
-	*/
+	data->tjmax = adjust_tjmax(c, data->id, &pdev->dev);
+	platform_set_drvdata(pdev, data);
 
-	if (((c->x86_model == 0xf) && (c->x86_mask > 3)) ||
-		(c->x86_model == 0xe))  {
-		err = rdmsr_safe_on_cpu(data->id, 0xee, &eax, &edx);
+	/* read the still undocumented IA32_TEMPERATURE_TARGET it exists
+	   on older CPUs but not in this register, Atoms don't have it either */
+
+	if ((c->x86_model > 0xe) && (c->x86_model != 0x1c)) {
+		err = rdmsr_safe_on_cpu(data->id, 0x1a2, &eax, &edx);
 		if (err) {
-			dev_warn(&pdev->dev,
-				 "Unable to access MSR 0xEE, Tjmax left at %d "
-				 "degrees C\n", data->tjmax/1000);
-		} else if (eax & 0x40000000) {
-			data->tjmax = 85000;
+			dev_warn(&pdev->dev, "Unable to read"
+					" IA32_TEMPERATURE_TARGET MSR\n");
+		} else {
+			data->ttarget = data->tjmax -
+					(((eax >> 8) & 0xff) * 1000);
+			err = device_create_file(&pdev->dev,
+					&sensor_dev_attr_temp1_max.dev_attr);
+			if (err)
+				goto exit_free;
 		}
 	}
 
-	/* Intel says that above should not work for desktop Core2 processors,
-	   but it seems to work. There is no other way how get the absolute
-	   readings. Warn the user about this. First check if are desktop,
-	   bit 50 of MSR_IA32_PLATFORM_ID should be 0.
-	*/
-
-	rdmsr_safe_on_cpu(data->id, MSR_IA32_PLATFORM_ID, &eax, &edx);
-
-	if ((c->x86_model == 0xf) && (!(edx & 0x00040000))) {
-		dev_warn(&pdev->dev, "Using undocumented features, absolute "
-			 "temperature might be wrong!\n");
-	}
-
-	platform_set_drvdata(pdev, data);
-
 	if ((err = sysfs_create_group(&pdev->dev.kobj, &coretemp_group)))
-		goto exit_free;
+		goto exit_dev;
 
 	data->class_dev = hwmon_device_register(&pdev->dev);
 	if (IS_ERR(data->class_dev)) {
@@ -240,6 +319,8 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
 
 exit_class:
 	sysfs_remove_group(&pdev->dev.kobj, &coretemp_group);
+exit_dev:
+	device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr);
 exit_free:
 	kfree(data);
 exit:
@@ -252,6 +333,7 @@ static int __devexit coretemp_remove(struct platform_device *pdev)
 
 	hwmon_device_unregister(data->class_dev);
 	sysfs_remove_group(&pdev->dev.kobj, &coretemp_group);
+	device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr);
 	platform_set_drvdata(pdev, NULL);
 	kfree(data);
 	return 0;
@@ -318,7 +400,7 @@ exit:
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-void coretemp_device_remove(unsigned int cpu)
+static void coretemp_device_remove(unsigned int cpu)
 {
 	struct pdev_entry *p, *n;
 	mutex_lock(&pdev_list_mutex);
@@ -332,25 +414,24 @@ void coretemp_device_remove(unsigned int cpu)
 	mutex_unlock(&pdev_list_mutex);
 }
 
-static int coretemp_cpu_callback(struct notifier_block *nfb,
+static int __cpuinit coretemp_cpu_callback(struct notifier_block *nfb,
 				 unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long) hcpu;
 
 	switch (action) {
 	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
+	case CPU_DOWN_FAILED:
 		coretemp_device_add(cpu);
 		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
+	case CPU_DOWN_PREPARE:
 		coretemp_device_remove(cpu);
 		break;
 	}
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata coretemp_cpu_notifier = {
+static struct notifier_block coretemp_cpu_notifier = {
 	.notifier_call = coretemp_cpu_callback,
 };
 #endif				/* !CONFIG_HOTPLUG_CPU */
@@ -371,9 +452,15 @@ static int __init coretemp_init(void)
 	for_each_online_cpu(i) {
 		struct cpuinfo_x86 *c = &(cpu_data)[i];
 
-		/* check if family 6, models e, f */
+		/* check if family 6, models 0xe (Pentium M DC),
+		  0xf (Core 2 DC 65nm), 0x16 (Core 2 SC 65nm),
+		  0x17 (Penryn 45nm), 0x1a (Nehalem), 0x1c (Atom),
+		  0x1e (Lynnfield) */
 		if ((c->cpuid_level < 0) || (c->x86 != 0x6) ||
-		    !((c->x86_model == 0xe) || (c->x86_model == 0xf))) {
+		    !((c->x86_model == 0xe) || (c->x86_model == 0xf) ||
+			(c->x86_model == 0x16) || (c->x86_model == 0x17) ||
+			(c->x86_model == 0x1a) || (c->x86_model == 0x1c) ||
+			(c->x86_model == 0x1e))) {
 
 			/* supported CPU not found, but report the unknown
 			   family 6 CPU */