From: ddugger@redhat.com <ddugger@redhat.com> Date: Mon, 23 Mar 2009 10:23:15 -0600 Subject: [xen] x86: emulate accesses to PCI window regs cf8/cfc Message-id: 200903231623.n2NGNFWm022091@sobek.n0ano.com O-Subject: [RHEL5.4 PATCH 11/21 V2] Emulate accesses to PCI window registers cf8/cfc to synchronise with accesses by the hypervisor itself. All users of cf8/cfc go through new access functions which take the appropriate spinlock. Bugzilla: 484227 RH-Acked-by: Chris Lalancette <clalance@redhat.com> RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com> RH-Acked-by: Chris Lalancette <clalance@redhat.com> This is backported from xen-unstable (c/s 17441 and 17465). Dom0 will access PCI window register cf8/cfc, now hypervisor also accesses PCI configure space using PCI window register cf8/cfc for VT-d. So it's possible that there is conflict, though the possibility is very small. This patch emulates accesses to PCI window registers cf8/cfc to synchronise with accesses by the hypervisor itself, then fixes the issue. BTW, there are still many changes on I/O port access between RHEL 5.4 and xen-unstable, the but they are not necessary for VT-d, we didn't backport them, so you can find two difference between this patch and xen-unstable: there are no check_guest_io_breakpoint() and pv_post_outb_hook() in traps.c in RHEL 5.4. Upstream Status: Accepted (CS 17441, 17465) BZ: 484227 Signed-off-by: Gerd Hoffman <kraxel@redhat.com> Signed-off-by: Weidong Han <weidong.han@intel.com> Signed-off-by: Don Dugger <donald.d.dugger@intel.com> diff --git a/arch/x86/cpu/amd.c b/arch/x86/cpu/amd.c index 42947f0..f848afe 100644 --- a/arch/x86/cpu/amd.c +++ b/arch/x86/cpu/amd.c @@ -3,6 +3,7 @@ #include <xen/bitops.h> #include <xen/mm.h> #include <xen/smp.h> +#include <xen/pci.h> #include <asm/io.h> #include <asm/msr.h> #include <asm/processor.h> @@ -66,19 +67,6 @@ static int c1_ramping_may_cause_clock_drift(struct cpuinfo_x86 *c) return 1; } -/* PCI access functions. Should be safe to use 0xcf8/0xcfc port accesses here. */ -static u8 pci_read_byte(u32 bus, u32 dev, u32 fn, u32 reg) -{ - outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8); - return inb(0xcfc + (reg & 3)); -} - -static void pci_write_byte(u32 bus, u32 dev, u32 fn, u32 reg, u8 val) -{ - outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8); - outb(val, 0xcfc + (reg & 3)); -} - /* * Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh on 8th-generation * cores only. Assume BIOS has setup all Northbridges equivalently. @@ -88,18 +76,18 @@ static void disable_c1_ramping(void) u8 pmm7; int node, nr_nodes; - /* Read the number of nodes from the first Northbridge. */ - nr_nodes = ((pci_read_byte(0, 0x18, 0x0, 0x60)>>4)&0x07)+1; - for (node = 0; node < nr_nodes; node++) { - /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */ - pmm7 = pci_read_byte(0, 0x18+node, 0x3, 0x87); - /* Invalid read means we've updated every Northbridge. */ - if (pmm7 == 0xFF) - break; - pmm7 &= 0xFC; /* clear pmm7[1:0] */ - pci_write_byte(0, 0x18+node, 0x3, 0x87, pmm7); - printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node); - } + /* Read the number of nodes from the first Northbridge. */ + nr_nodes = ((pci_conf_read32(0, 0x18, 0x0, 0x60)>>4)&0x07)+1; + for (node = 0; node < nr_nodes; node++) { + /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */ + pmm7 = pci_conf_read8(0, 0x18+node, 0x3, 0x87); + /* Invalid read means we've updated every Northbridge. */ + if (pmm7 == 0xFF) + break; + pmm7 &= 0xFC; /* clear pmm7[1:0] */ + pci_conf_write8(0, 0x18+node, 0x3, 0x87, pmm7); + printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node); + } } static void __init init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/domain_build.c b/arch/x86/domain_build.c index 8dcf816..59141cc 100644 --- a/arch/x86/domain_build.c +++ b/arch/x86/domain_build.c @@ -985,6 +985,8 @@ int __init construct_dom0( rc |= ioports_deny_access(dom0, 0x40, 0x43); /* PIT Channel 2 / PC Speaker Control. */ rc |= ioports_deny_access(dom0, 0x61, 0x61); + /* PCI configuration space (NB. 0xcf8 has special treatment). */ + rc |= ioports_deny_access(dom0, 0xcfc, 0xcff); /* Command-line I/O ranges. */ process_dom0_ioports_disable(); diff --git a/arch/x86/traps.c b/arch/x86/traps.c index 911694a..b6bf936 100644 --- a/arch/x86/traps.c +++ b/arch/x86/traps.c @@ -1119,7 +1119,7 @@ static int read_descriptor(unsigned int sel, } /* Has the guest requested sufficient permission for this I/O access? */ -static inline int guest_io_okay( +static int guest_io_okay( unsigned int port, unsigned int bytes, struct vcpu *v, struct cpu_user_regs *regs) { @@ -1161,19 +1161,126 @@ static inline int guest_io_okay( } /* Has the administrator granted sufficient permission for this I/O access? */ -static inline int admin_io_okay( +static int admin_io_okay( unsigned int port, unsigned int bytes, struct vcpu *v, struct cpu_user_regs *regs) { + /* + * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses. + * We never permit direct access to that register. + */ + if ( (port == 0xcf8) && (bytes == 4) ) + return 0; + return ioports_access_permitted(v->domain, port, port + bytes - 1); } -#define guest_inb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r) -#define guest_inw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r) -#define guest_inl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r) -#define guest_outb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r) -#define guest_outw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r) -#define guest_outl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r) +static uint32_t guest_io_read( + unsigned int port, unsigned int bytes, + struct vcpu *v, struct cpu_user_regs *regs) +{ + extern uint32_t pci_conf_read( + uint32_t cf8, uint8_t offset, uint8_t bytes); + + uint32_t data = 0; + unsigned int shift = 0; + + if ( admin_io_okay(port, bytes, v, regs) ) + { + switch ( bytes ) + { + case 1: return inb(port); + case 2: return inw(port); + case 4: return inl(port); + } + } + + while ( bytes != 0 ) + { + unsigned int size = 1; + uint32_t sub_data = 0xff; + + if ( (port == 0x42) || (port == 0x43) || (port == 0x61) ) + { + sub_data = pv_pit_handler(port, 0, 0); + } + else if ( (port == 0xcf8) && (bytes == 4) ) + { + size = 4; + sub_data = v->domain->arch.pci_cf8; + } + else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) ) + { + size = min(bytes, 4 - (port & 3)); + if ( size == 3 ) + size = 2; + sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size); + } + + if ( size == 4 ) + return sub_data; + + data |= (sub_data & ((1u << (size * 8)) - 1)) << shift; + shift += size * 8; + port += size; + bytes -= size; + } + + return data; +} + +static void guest_io_write( + unsigned int port, unsigned int bytes, uint32_t data, + struct vcpu *v, struct cpu_user_regs *regs) +{ + extern void pci_conf_write( + uint32_t cf8, uint8_t offset, uint8_t bytes, uint32_t data); + + if ( admin_io_okay(port, bytes, v, regs) ) + { + switch ( bytes ) { + case 1: + outb((uint8_t)data, port); + break; + case 2: + outw((uint16_t)data, port); + break; + case 4: + outl(data, port); + break; + } + return; + } + + while ( bytes != 0 ) + { + unsigned int size = 1; + + if ( (port == 0x42) || (port == 0x43) || (port == 0x61) ) + { + pv_pit_handler(port, (uint8_t)data, 1); + } + else if ( (port == 0xcf8) && (bytes == 4) ) + { + size = 4; + v->domain->arch.pci_cf8 = data; + } + else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) ) + { + size = min(bytes, 4 - (port & 3)); + if ( size == 3 ) + size = 2; + pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data); + } + + if ( size == 4 ) + return; + + port += size; + bytes -= size; + data >>= size * 8; + } +} /* I/O emulation support. Helper routines for, and type of, the stack stub.*/ void host_to_guest_gpr_switch(struct cpu_user_regs *) @@ -1294,7 +1401,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) /* REX prefix. */ if ( rex & 8 ) /* REX.W */ - op_bytes = 4; /* emulating only opcodes not supporting 64-bit operands */ + op_bytes = 4; /* emulate only opcodes not supporting 64-bit operands */ modrm_reg = (rex & 4) << 1; /* REX.R */ /* REX.X does not need to be decoded. */ modrm_rm = (rex & 1) << 3; /* REX.B */ @@ -1323,7 +1430,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) { if ( !read_descriptor(data_sel, v, regs, &data_base, &data_limit, &ar, - _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) ) + _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL| + _SEGMENT_P) ) goto fail; if ( !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) || @@ -1362,73 +1470,47 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) } #endif + port = (u16)regs->edx; + continue_io_string: switch ( opcode ) { case 0x6c: /* INSB */ op_bytes = 1; case 0x6d: /* INSW/INSL */ - if ( data_limit < op_bytes - 1 || - rd_ad(edi) > data_limit - (op_bytes - 1) || - !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) + if ( (data_limit < (op_bytes - 1)) || + (rd_ad(edi) > (data_limit - (op_bytes - 1))) || + !guest_io_okay(port, op_bytes, v, regs) ) goto fail; - port = (u16)regs->edx; - switch ( op_bytes ) - { - case 1: - /* emulate PIT counter 2 */ - data = (u8)(guest_inb_okay(port, v, regs) ? inb(port) : - ((port == 0x42 || port == 0x43 || port == 0x61) ? - pv_pit_handler(port, 0, 0) : ~0)); - break; - case 2: - data = (u16)(guest_inw_okay(port, v, regs) ? inw(port) : ~0); - break; - case 4: - data = (u32)(guest_inl_okay(port, v, regs) ? inl(port) : ~0); - break; - } - if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), &data, op_bytes)) != 0 ) + data = guest_io_read(port, op_bytes, v, regs); + if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), + &data, op_bytes)) != 0 ) { propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc, PFEC_write_access); return EXCRET_fault_fixed; } - wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes)); + wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) + ? -op_bytes : op_bytes)); break; case 0x6e: /* OUTSB */ op_bytes = 1; case 0x6f: /* OUTSW/OUTSL */ - if ( data_limit < op_bytes - 1 || - rd_ad(esi) > data_limit - (op_bytes - 1) || - !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) + if ( (data_limit < (op_bytes - 1)) || + (rd_ad(esi) > (data_limit - (op_bytes - 1))) || + !guest_io_okay(port, op_bytes, v, regs) ) goto fail; - rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes); - if ( rc != 0 ) + if ( (rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), + op_bytes)) != 0 ) { - propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0); + propagate_page_fault(data_base + rd_ad(esi) + + op_bytes - rc, 0); return EXCRET_fault_fixed; } - port = (u16)regs->edx; - switch ( op_bytes ) - { - case 1: - if ( guest_outb_okay(port, v, regs) ) - outb((u8)data, port); - else if ( port == 0x42 || port == 0x43 || port == 0x61 ) - pv_pit_handler(port, data, 1); - break; - case 2: - if ( guest_outw_okay(port, v, regs) ) - outw((u16)data, port); - break; - case 4: - if ( guest_outl_okay(port, v, regs) ) - outl((u32)data, port); - break; - } - wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes)); + guest_io_write(port, op_bytes, data, v, regs); + wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) + ? -op_bytes : op_bytes)); break; } @@ -1490,31 +1572,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) exec_in: if ( !guest_io_okay(port, op_bytes, v, regs) ) goto fail; - switch ( op_bytes ) + if ( admin_io_okay(port, op_bytes, v, regs) ) { - case 1: - if ( guest_inb_okay(port, v, regs) ) - io_emul(regs); - else if ( port == 0x42 || port == 0x43 || port == 0x61 ) - { - regs->eax &= ~0xffUL; - regs->eax |= pv_pit_handler(port, 0, 0); - } - else - regs->eax |= (u8)~0; - break; - case 2: - if ( guest_inw_okay(port, v, regs) ) - io_emul(regs); - else - regs->eax |= (u16)~0; - break; - case 4: - if ( guest_inl_okay(port, v, regs) ) - io_emul(regs); + io_emul(regs); + } + else + { + if ( op_bytes == 4 ) + regs->eax = 0; else - regs->eax = (u32)~0; - break; + regs->eax &= ~((1u << (op_bytes * 8)) - 1); + regs->eax |= guest_io_read(port, op_bytes, v, regs); } goto done; @@ -1532,22 +1600,11 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) exec_out: if ( !guest_io_okay(port, op_bytes, v, regs) ) goto fail; - switch ( op_bytes ) + if ( admin_io_okay(port, op_bytes, v, regs) ) + io_emul(regs); + else { - case 1: - if ( guest_outb_okay(port, v, regs) ) - io_emul(regs); - else if ( port == 0x42 || port == 0x43 || port == 0x61 ) - pv_pit_handler(port, regs->eax, 1); - break; - case 2: - if ( guest_outw_okay(port, v, regs) ) - io_emul(regs); - break; - case 4: - if ( guest_outl_okay(port, v, regs) ) - io_emul(regs); - break; + guest_io_write(port, op_bytes, regs->eax, v, regs); } goto done; diff --git a/include/asm-x86/domain.h b/include/asm-x86/domain.h index 7a34df4..3f5f06a 100644 --- a/include/asm-x86/domain.h +++ b/include/asm-x86/domain.h @@ -219,6 +219,7 @@ struct arch_domain /* I/O-port admin-specified access capabilities. */ struct rangeset *ioport_caps; + uint32_t pci_cf8; struct hvm_domain hvm_domain;