Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 2893

kernel-2.6.18-128.1.10.el5.src.rpm

From: Kimball Murray <kmurray@redhat.com>
Date: Thu, 12 Jul 2007 10:44:52 -0400
Subject: [xen] x86_64: add stratus hooks into memory
Message-id: 20070712144107.18197.72603.sendpatchset@dhcp83-86.boston.redhat.com
O-Subject: [RHEL5.1 Patch 1/3 ] (BZ-247833)
Bugzilla: 247833

The following hypervisor patch implements a new x86_64 platform op in the
Xen hypervisor.  It is part of a set of patches that enable the management
software of a Stratus system to control the machine.  It is necessary because
the management code (running in Dom0) needs access to hardware in a way that
the hypervisor doesn't allow.  More on that in a moment.

Additionally, there are 3 kernel-side patches that go along with this
platform operation.  One, the memory tracking patch (BZ-242514), has already
been posted and reviewed.  Two others (BZ-247841, BZ-247839) are trivial and
shall be posted along with this.  I am posting to both rh-kernel and
virtualist so that reviewers may see both sides of the dom0 platform op and
be able to see how the pieces fit together.  I should also mention that this
is x86_64 only.

Patch Description:

	Stratus management code needs to do 5 things in Dom0 that the
hypervisor does not allow.

1. Trigger System Management Interrupts (SMI) through the local apic.
   Stratus installs its own SMI handler and uses it extensively to
   perform certain platform management functions in an OS-independent
   manner.  The SMIs are generated by writing to the local apic. However,
   the hypervisor prevents any domain from mapping the local apic, even
   Dom0.

2. Writing BIOS tokens into page 0.  Stratus BIOS reads a couple of key
   locations in page 0 to determine if it should take a dump of all memory
   on bootup.  But Dom0's page0 isn't necessarily the machine's page 0.

3. CPUID instruction.  The management code needs to get the "real" cpuid
   bits in order to correctly function.

4. Read/Write some MSRs.  Some of these are virtualized in Dom0, and this
   may trick the management code into doing the wrong things.

5. Getting the true local apic id.  When triggering SMIs, the management
   code has to be sure which cpu is really triggering it.  If the wrong
   cpu is specified, the SMI handler will get stuck and never return.

After talking with several people on the virtualist team about these
requirements, it was agreed that they can not be done in Dom0, hence the need
for a special Stratus platform operation.

An int type is used in the switch statement for platform ops.
Currently there are only about 8 or so ops defined, leaving over 4 billion
available.  This patch takes the very last possible value (0xffffffff), so
it should be a couple of million years before a collision with any new
platform ops :)

If anyone attempts to use this new platform-op on a non-Stratus platform,
-ENOSYS will be returned.  On Stratus hardware, the first call to the
platform op must request a platform validation.  Only after the Stratus
platform has been validated (using DMI strings) can the Stratus functions
be unlocked.

Other than adding a platform operation and some new structs for the Stratus
functions, non of the existing hypervisor code is modified.

Testing:
	With this patch applied to the 2.6.9-18-33 hypervisor, I haven't
seen any issues on non-Stratus hardware.  On Stratus platform, the
management code does work using this interface.

Upstream:
	Not there at the moment.  Originally I had added a new hypervisor call
which I knew wouldn't fly upstream.  The virtualist team convinced me to instead
use a new platform op.  This was a good idea but was much harder to implement
because the 3.10 hypervisor wants to build compat routines for all the
platform ops.  But now that I have figured out how to deal with that, this patch
might have a better chance of upstream inclusion so I will attempt to push it
up to Xen Source.  At least that way, Stratus can "reserve" the 0xffffffff
platform command.

Please consider this for 5.1 Beta, if it's not too late, and otherwise for
a possible later snapshot.

As always, questions are welcome.

-kimball

------------------------------ snip ------------------------

Acked-by: Rik van Riel <riel@redhat.com>

diff --git a/arch/x86/platform_hypercall.c b/arch/x86/platform_hypercall.c
index 2e087c4..110fc70 100644
--- a/arch/x86/platform_hypercall.c
+++ b/arch/x86/platform_hypercall.c
@@ -247,6 +247,17 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
         }
         break;
 
+#if defined (CONFIG_X86_64) && !defined (COMPAT)
+    case XENPF_stratus_call:
+    {
+            extern int do_stratus(xenpf_stratus_call_t*);
+            ret = (ret_t)do_stratus(&(op->u.stratus_call));
+	    if (copy_to_guest(u_xenpf_op, op, 1))
+		ret = -EFAULT;
+    }
+    break;
+#endif
+
     default:
         ret = -ENOSYS;
         break;
diff --git a/arch/x86/x86_64/Makefile b/arch/x86/x86_64/Makefile
index f712ca6..346cb6c 100644
--- a/arch/x86/x86_64/Makefile
+++ b/arch/x86/x86_64/Makefile
@@ -1,4 +1,5 @@
 subdir-y += compat
+subdir-y += stratus
 
 obj-y += entry.o
 obj-y += gpr_switch.o
diff --git a/arch/x86/x86_64/stratus/Makefile b/arch/x86/x86_64/stratus/Makefile
new file mode 100644
index 0000000..68a5fcc
--- /dev/null
+++ b/arch/x86/x86_64/stratus/Makefile
@@ -0,0 +1 @@
+obj-y += stratus.o host.o
diff --git a/arch/x86/x86_64/stratus/host.c b/arch/x86/x86_64/stratus/host.c
new file mode 100644
index 0000000..8e36071
--- /dev/null
+++ b/arch/x86/x86_64/stratus/host.c
@@ -0,0 +1,107 @@
+//#include "cc_os_defines.h"
+//#include "host.h"
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <xen/spinlock.h>
+
+unsigned int
+OS_READ_REG_UINT32(
+    unsigned int * Reg
+    )
+{
+	return readl(Reg);
+}
+
+void
+OS_WRITE_REG_UINT32(
+    unsigned int * Reg,
+    unsigned int   Value
+    )
+{
+	writel(Value,Reg);
+}
+
+
+// - misc apic defines
+#define DELIVERY_PENDING        0x00001000
+#define DESTINATION_MASK        0xFF000000
+#define DESTINATION_SHIFT       24
+#define DELIVERY_MODE_MASK      0x00000700
+#define DELIVER_SMI             0x00000200
+#define DELIVER_NMI             0x00000400
+#define DELIVER_INIT            0x00000500
+#define DELIVER_STARTUP         0x00000600
+#define PHYSICAL_DESTINATION    0x00000000
+#define LOGICAL_DESTINATION     0x00000800
+#define EDGE_TRIGGERED          0x00000000
+#define LEVEL_ASSERT            0x00004000
+#define INT_MASKED              0x00010000
+#define ICR_SHORTHAND_MASK      0x000C0000
+#define ICR_USE_DEST_FIELD      0x00000000
+#define ICR_SELF                0x00040000
+#define ICR_ALL_INCL_SELF       0x00080000
+#define ICR_ALL_EXCL_SELF       0x000C0000
+
+
+#define APIC_REG_UINT32(Base,ByteOffset) \
+        (((unsigned int *)(Base))[(ByteOffset)/sizeof(unsigned int)])
+#define LU_ID_REGISTER    0x00000020
+#define LU_INT_CMD_LOW    0x00000300
+#define LU_INT_CMD_HIGH   0x00000310
+#define LU_INT_VECTOR_1   0x00000360
+
+// - used to poll until the apic is not busy
+#define STALL_WHILE_APIC_BUSY(ApicBase) \
+    do { \
+        while (OS_READ_REG_UINT32( \
+                    &APIC_REG_UINT32((ApicBase),LU_INT_CMD_LOW)) & \
+               DELIVERY_PENDING) \
+            ; \
+    } while(0)
+
+#define APIC_SMI_TO_PHYS_DEST  (DELIVER_SMI | PHYSICAL_DESTINATION | \
+                                ICR_USE_DEST_FIELD | EDGE_TRIGGERED)
+
+#define SMI_DEST_ALL   0xffffffff
+#define SMI_DEST_SELF  0xfffffffe
+
+// - get apic processor id 
+#define APIC_PROC_ID(ApicBase) \
+    ((OS_READ_REG_UINT32(&APIC_REG_UINT32((ApicBase),LU_ID_REGISTER)) \
+     & 0x0F000000) >> 24)
+
+unsigned int HostGetProcId(void)
+{
+    return APIC_PROC_ID(APIC_BASE);
+}
+
+void host_request_smi(unsigned int dest)
+{
+	unsigned char ProcId;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	STALL_WHILE_APIC_BUSY(APIC_BASE);
+
+	switch (dest) {
+	case SMI_DEST_ALL:
+		OS_WRITE_REG_UINT32(&APIC_REG_UINT32(APIC_BASE,LU_INT_CMD_HIGH),
+			(unsigned int)(0xff << DESTINATION_SHIFT));
+	    break;
+	case SMI_DEST_SELF:
+	default:
+		ProcId = (unsigned char)(dest == SMI_DEST_SELF ? HostGetProcId() : dest);
+		OS_WRITE_REG_UINT32(&APIC_REG_UINT32(APIC_BASE,LU_INT_CMD_HIGH),
+			ProcId << DESTINATION_SHIFT);
+	}
+
+	OS_WRITE_REG_UINT32(&APIC_REG_UINT32(APIC_BASE,LU_INT_CMD_LOW),
+			APIC_SMI_TO_PHYS_DEST);
+
+	STALL_WHILE_APIC_BUSY(APIC_BASE);
+
+	local_irq_restore(flags);
+}
diff --git a/arch/x86/x86_64/stratus/stratus.c b/arch/x86/x86_64/stratus/stratus.c
new file mode 100644
index 0000000..be85bb3
--- /dev/null
+++ b/arch/x86/x86_64/stratus/stratus.c
@@ -0,0 +1,211 @@
+#include <xen/errno.h>
+#include <xen/lib.h>
+#include <xen/smp.h>
+#include <xen/sched.h>
+#include <xen/dmi.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#ifdef __XEN_COMPAT_H
+#undef __XEN_COMPAT_H
+#endif
+#include <public/platform.h>
+
+extern void host_request_smi(unsigned int dest);
+
+static long cc_cr4(xenpf_stratus_call_t *cc_call) {
+	int rw = cc_call->u.cr4.rw;
+	unsigned long cr4;
+
+	if (rw) {	// Write
+		return -ENOSYS;
+	} else {	// Read
+		asm("movq %%cr4,%0" : "=r" (cr4));
+		cc_call->u.cr4.cr4 = cr4;
+	}
+
+	return 0;
+}
+
+static long cc_cpuid(xenpf_stratus_call_t *cc_call) {
+	cpuid(  cc_call->u.cpuid.op,
+		&cc_call->u.cpuid.eax,
+		&cc_call->u.cpuid.ebx,
+		&cc_call->u.cpuid.ecx,
+		&cc_call->u.cpuid.edx );
+
+	return 0;
+}
+
+static long cc_rw_msr(xenpf_stratus_call_t *cc_call) {
+	if (cc_call->u.msr.rw == 0) {
+		// Read
+		rdmsrl(cc_call->u.msr.msr, cc_call->u.msr.val);
+	} else {
+		wrmsrl(cc_call->u.msr.msr, cc_call->u.msr.val);
+	}
+
+	return 0;
+}
+
+static long cc_lapic_id(xenpf_stratus_call_t *cc_call) {
+	cc_call->u.ls.id = GET_APIC_ID(apic_read(APIC_ID));
+	return 0;
+}
+
+#define DUMP_VECTOR_PHYS  	(0xf00)
+#define HOST_BIOS_VECTOR_PHYS	(0xff0)
+#define HOST_BIOS_VECTOR_SIZE	(0x10)
+
+
+static long cc_rw_hbv(xenpf_stratus_call_t *cc_call) {
+	int rw = cc_call->u.rw.rw;
+	int size = cc_call->u.rw.size;
+	unsigned long where = cc_call->u.rw.where;
+
+	if (((unsigned long)where + (unsigned long)size) > 
+			HOST_BIOS_VECTOR_SIZE) {
+		return -EFAULT;
+	}
+
+	where += (unsigned long)__va(HOST_BIOS_VECTOR_PHYS);
+
+	if (rw) {	// Write
+		if (copy_from_user((void*)where, cc_call->u.rw.data, size)) {
+			return -EFAULT;
+		}
+	} else {	// Read
+		if (copy_to_user(cc_call->u.rw.data, (void*)where, size)) {
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static long cc_rw_dumpvec(xenpf_stratus_call_t *cc_call) {
+	int rw = cc_call->u.rw.rw;
+	int size = cc_call->u.rw.size;
+	void *where = __va(DUMP_VECTOR_PHYS);
+
+	if (size > sizeof(int)) {
+		return -EINVAL;
+	}
+
+	if (rw) {	// Write
+		if (copy_from_user((void*)where, cc_call->u.rw.data, size)) {
+			return -EFAULT;
+		}
+	} else {	// Read
+		if (copy_to_user(cc_call->u.rw.data, (void*)where, size)) {
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static long cc_rw_region(xenpf_stratus_call_t *cc_call) {
+	switch (cc_call->u.rw.region) {
+		case RW_HBV:
+			return cc_rw_hbv(cc_call);
+			break;
+		case RW_DUMPVEC:
+			return cc_rw_dumpvec(cc_call);
+			break;
+		default:
+			return -EINVAL;
+	}
+}
+
+static long cc_smi(xenpf_stratus_call_t *cc_call) {
+	host_request_smi(cc_call->u.smi.dest);
+	return 0;
+}
+
+static long cc_hbv_memset(xenpf_stratus_call_t *cc_call) {
+	int size = cc_call->u.hbv_m.size;
+
+	if (size > HOST_BIOS_VECTOR_SIZE)
+		size = HOST_BIOS_VECTOR_SIZE;
+
+	memset(__va(HOST_BIOS_VECTOR_PHYS), cc_call->u.hbv_m.val, cc_call->u.hbv_m.size);
+
+	return 0;
+}
+
+static int locked_out = 1;
+
+static int found_stratus(struct dmi_system_id *d)
+{
+	printk("Stratus platform detected.\n");
+	return 0;
+}
+
+#define NO_MATCH	{ DMI_NONE, NULL}
+#define MATCH		DMI_MATCH
+
+static struct dmi_system_id stratus_platform[] = {
+	{ found_stratus, "Stratus Platform", {
+			MATCH(DMI_BOARD_VENDOR, "Stratus"),
+			NO_MATCH, NO_MATCH, NO_MATCH
+			} },
+	{ NULL, NULL, }
+};
+
+static int check_stratus_dmi(void) {
+	// Run dmi scan looking for Stratus Vendor string.
+	if (dmi_check_system(stratus_platform))
+		return 0;
+
+	return 1;
+}
+
+long do_stratus(xenpf_stratus_call_t *call) {
+	long ret = -EINVAL;
+
+	if (!IS_PRIV(current->domain))
+		return -EPERM;
+
+	if (call->cmd == CC_VALIDATE_PLATFORM)
+		locked_out = check_stratus_dmi();
+
+	if (locked_out)
+		return -EPERM;
+
+	switch (call->cmd) {
+	case CC_TRIGGER_SMI:
+		ret = cc_smi(call);
+		break;
+	case CC_HBV_MEMSET:
+		ret = cc_hbv_memset(call);
+		break;
+	case CC_RW_REGION:
+		ret = cc_rw_region(call);
+		break;
+	case CC_LAPIC_ID:
+		ret = cc_lapic_id(call);
+		break;
+	case CC_CR4:
+		ret = cc_cr4(call);
+		break;
+	case CC_CPUID:
+		ret = cc_cpuid(call);
+		break;
+	case CC_RW_MSR:
+		ret = cc_rw_msr(call);
+		break;
+	case CC_VALIDATE_PLATFORM:
+		ret = 0;	// If we made it here, we are on a Stratus box.
+		break;
+	default:
+		printk("%s:line %d, unknown command %d\n", __func__,
+			__LINE__, call->cmd);
+		break;
+	}
+
+	call->ret = ret;
+
+	return ret;
+}
+
diff --git a/include/Makefile b/include/Makefile
index 0b1d382..264cc36 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -19,7 +19,8 @@ headers-y := \
     compat/version.h \
     compat/xen.h \
     compat/xencomm.h \
-    compat/xenoprof.h
+    compat/xenoprof.h \
+    compat/stratus.h
 headers-$(CONFIG_X86)     += compat/arch-x86/xen.h
 headers-$(CONFIG_X86)     += compat/arch-x86/xen-$(compat-arch-y).h
 headers-y                 += compat/arch-$(compat-arch-y).h compat/xlat.h
diff --git a/include/public/platform.h b/include/public/platform.h
index 3347666..139375c 100644
--- a/include/public/platform.h
+++ b/include/public/platform.h
@@ -28,6 +28,7 @@
 #define __XEN_PUBLIC_PLATFORM_H__
 
 #include "xen.h"
+#include "stratus.h"
 
 #define XENPF_INTERFACE_VERSION 0x03000001
 
@@ -153,6 +154,10 @@ struct xenpf_firmware_info {
 typedef struct xenpf_firmware_info xenpf_firmware_info_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
 
+#define XENPF_stratus_call	0xffffffff
+typedef struct xenpf_stratus_call xenpf_stratus_call_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_stratus_call_t);
+
 struct xen_platform_op {
     uint32_t cmd;
     uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -164,6 +169,7 @@ struct xen_platform_op {
         struct xenpf_microcode_update  microcode;
         struct xenpf_platform_quirk    platform_quirk;
         struct xenpf_firmware_info     firmware_info;
+	struct xenpf_stratus_call      stratus_call;
         uint8_t                        pad[128];
     } u;
 };
diff --git a/include/public/stratus.h b/include/public/stratus.h
new file mode 100644
index 0000000..a224444
--- /dev/null
+++ b/include/public/stratus.h
@@ -0,0 +1,76 @@
+#ifndef _CC_INTERFACE_H
+#define _CC_INTERFACE_H
+
+// Clear the entire Host BIOS vector
+#define CC_HBV_MEMSET 			1	
+// Read/Write from page 0 (HBV or DUMP)
+#define CC_RW_REGION			2
+// Trigger SMI through local apic
+#define CC_TRIGGER_SMI			3
+// Return local cpu apic id
+#define CC_LAPIC_ID			4
+// Get/Set CR4.
+#define CC_CR4				5
+// Get cpuid
+#define CC_CPUID			6
+// Read/Write MSRs
+#define CC_RW_MSR			7
+// Are we on a Stratus box?
+#define CC_VALIDATE_PLATFORM		8
+
+// Page 0 regions to read/write (host bios vector or dump vector signature).
+#define	RW_HBV		1
+#define	RW_DUMPVEC	2
+
+struct cr4_struct {
+	int rw;		// 0 = read, 1 = write.
+	unsigned long cr4;
+};
+
+struct cpuid_struct {
+	unsigned int op;
+	unsigned int eax, ebx, ecx, edx;	
+};
+
+struct msr_struct {
+	int rw;
+	unsigned int msr;
+	unsigned long val;
+};
+
+struct lapic_struct {
+	int id;
+};
+
+struct rw_struct {
+	int rw;			// 0 = read, 1 = write
+	int region;		// RW_HBV or RW_CONTIG
+	void *data;
+	unsigned long where;	// offset in region
+	int size;
+};
+
+struct smi_struct {
+	unsigned int dest;
+};
+
+struct hbv_memset_struct {
+	int val;
+	int size;
+};
+
+struct xenpf_stratus_call {
+	int cmd;
+	int ret;
+	union {
+		struct smi_struct smi;
+		struct hbv_memset_struct hbv_m;
+		struct rw_struct rw;
+		struct lapic_struct ls;
+		struct cr4_struct cr4;
+		struct cpuid_struct cpuid;
+		struct msr_struct msr;
+	} u;
+};
+
+#endif