Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 4583

kernel-2.6.18-194.11.1.el5.src.rpm

From: Chris Lalancette <clalance@redhat.com>
Date: Fri, 10 Apr 2009 09:47:14 +0200
Subject: [xen] x86: GDT: replace single page with one page/CPU
Message-id: 49DEF982.6060409@redhat.com
O-Subject: [RHEL5.4 PATCH 1/2]: Replace single GDT page with one GDT page per CPU
Bugzilla: 477206
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>

As mentioned in 0/2, this patch replaces the single GDT page with a GDT page
per-CPU, thus allowing us to have (theoretically) unlimited processors.  In the
next patch, we will bump this up to 256.

This should resolve BZ 477206.

diff --git a/arch/x86/boot/x86_32.S b/arch/x86/boot/x86_32.S
index b876a7f..39255b4 100644
--- a/arch/x86/boot/x86_32.S
+++ b/arch/x86/boot/x86_32.S
@@ -78,7 +78,7 @@ idt_descr:
         .word   0
 gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
-        .long   gdt_table - FIRST_RESERVED_GDT_BYTE
+        .long   boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
 
         .align PAGE_SIZE, 0
 /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
@@ -86,16 +86,17 @@ gdt_descr:
 #define GUEST_DESC(d)                                                   \
         .long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff,                \
               ((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
-ENTRY(gdt_table)
-        .quad 0x0000000000000000     /* unused */
+ENTRY(boot_cpu_gdt_table)
+        .quad 0x0000000000000000     /* double fault TSS */
         .quad 0x00cf9a000000ffff     /* 0xe008 ring 0 4.00GB code at 0x0 */
         .quad 0x00cf92000000ffff     /* 0xe010 ring 0 4.00GB data at 0x0 */
         GUEST_DESC(0x00c0ba00)       /* 0xe019 ring 1 3.xxGB code at 0x0 */
         GUEST_DESC(0x00c0b200)       /* 0xe021 ring 1 3.xxGB data at 0x0 */
         GUEST_DESC(0x00c0fa00)       /* 0xe02b ring 3 3.xxGB code at 0x0 */
         GUEST_DESC(0x00c0f200)       /* 0xe033 ring 3 3.xxGB data at 0x0 */
-        .quad 0x0000000000000000     /* unused                           */
-        .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
+        .fill (PER_CPU_GDT_ENTRY - FLAT_RING3_DS / 8 - 1), 8, 0
+        .quad 0x0000910000000000     /* per-CPU entry (limit == cpu) */
+        .align PAGE_SIZE,0
 
 #ifdef CONFIG_X86_PAE
         .align 32
diff --git a/arch/x86/boot/x86_64.S b/arch/x86/boot/x86_64.S
index 46c34c1..7932f8a 100644
--- a/arch/x86/boot/x86_64.S
+++ b/arch/x86/boot/x86_64.S
@@ -85,7 +85,7 @@ multiboot_ptr:
         .word   0
 gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
-        .quad   gdt_table - FIRST_RESERVED_GDT_BYTE
+        .quad   boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
 
         .word   0,0,0
 idt_descr:
@@ -96,7 +96,7 @@ ENTRY(stack_start)
         .quad   cpu0_stack
 
         .align PAGE_SIZE, 0
-ENTRY(gdt_table)
+ENTRY(boot_cpu_gdt_table)
         .quad 0x0000000000000000     /* unused */
         .quad 0x00af9a000000ffff     /* 0xe008 ring 0 code, 64-bit mode   */
         .quad 0x00cf92000000ffff     /* 0xe010 ring 0 data                */
@@ -105,13 +105,13 @@ ENTRY(gdt_table)
         .quad 0x00cff2000000ffff     /* 0xe02b ring 3 data                */
         .quad 0x00affa000000ffff     /* 0xe033 ring 3 code, 64-bit mode   */
         .quad 0x00cf9a000000ffff     /* 0xe038 ring 0 code, compatibility */
-        .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
-        .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
+        .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
+        .quad 0x0000910000000000     /* per-CPU entry (limit == cpu)      */
 
         .align PAGE_SIZE, 0
 /* NB. Even rings != 0 get access to the full 4Gb, as only the            */
 /*     (compatibility) machine->physical mapping table lives there.       */
-ENTRY(compat_gdt_table)
+ENTRY(boot_cpu_compat_gdt_table)
         .quad 0x0000000000000000     /* unused */
         .quad 0x00af9a000000ffff     /* 0xe008 ring 0 code, 64-bit mode   */
         .quad 0x00cf92000000ffff     /* 0xe010 ring 0 data                */
@@ -120,5 +120,6 @@ ENTRY(compat_gdt_table)
         .quad 0x00cffa000000ffff     /* 0xe02b ring 3 code, compatibility */
         .quad 0x00cff2000000ffff     /* 0xe033 ring 3 data                */
         .quad 0x00cf9a000000ffff     /* 0xe038 ring 0 code, compatibility */
-        .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
-        .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
+        .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
+        .quad 0x0000910000000000     /* per-CPU entry (limit == cpu)      */
+        .align PAGE_SIZE, 0
diff --git a/arch/x86/cpu/common.c b/arch/x86/cpu/common.c
index 268bcc2..5e02243 100644
--- a/arch/x86/cpu/common.c
+++ b/arch/x86/cpu/common.c
@@ -555,7 +555,10 @@ void __devinit cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	struct tss_struct *t = &init_tss[cpu];
-	char gdt_load[10];
+	struct desc_ptr gdt_desc = {
+		.base = (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY),
+		.limit = LAST_RESERVED_GDT_BYTE
+	};
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -569,9 +572,10 @@ void __devinit cpu_init(void)
 	if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
 		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
-	*(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
-	*(unsigned long  *)(&gdt_load[2]) = GDT_VIRT_START(current);
-	__asm__ __volatile__ ( "lgdt %0" : "=m" (gdt_load) );
+	/* Install correct page table. */
+	write_ptbase(current);
+
+	__asm__ __volatile__ ( "lgdt %0" : "=m" (gdt_desc) );
 
 	/* No nested task. */
 	__asm__("pushf ; andw $0xbfff,(%"__OP"sp) ; popf");
@@ -599,7 +603,4 @@ void __devinit cpu_init(void)
 #define CD(register) __asm__("mov %0,%%db" #register ::"r"(0UL) );
 	CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
 #undef CD
-
-	/* Install correct page table. */
-	write_ptbase(current);
 }
diff --git a/arch/x86/domain.c b/arch/x86/domain.c
index bee2dfb..84aaee4 100644
--- a/arch/x86/domain.c
+++ b/arch/x86/domain.c
@@ -274,7 +274,6 @@ static inline int may_switch_mode(struct domain *d)
 
 int switch_native(struct domain *d)
 {
-    l1_pgentry_t gdt_l1e;
     unsigned int vcpuid;
 
     if ( d == NULL )
@@ -287,12 +286,8 @@ int switch_native(struct domain *d)
     d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
     release_arg_xlat_area(d);
 
-    /* switch gdt */
-    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
     for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
     {
-        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
-                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
         if (d->vcpu[vcpuid])
             release_compat_l4(d->vcpu[vcpuid]);
     }
@@ -302,7 +297,6 @@ int switch_native(struct domain *d)
 
 int switch_compat(struct domain *d)
 {
-    l1_pgentry_t gdt_l1e;
     unsigned int vcpuid;
 
     if ( d == NULL )
@@ -314,15 +308,11 @@ int switch_compat(struct domain *d)
 
     d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
 
-    /* switch gdt */
-    gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
     for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
     {
         if ( (d->vcpu[vcpuid] != NULL) &&
              (setup_compat_l4(d->vcpu[vcpuid]) != 0) )
             goto undo_and_fail;
-        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
-                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
     }
 
     d->arch.physaddr_bitsize =
@@ -333,14 +323,10 @@ int switch_compat(struct domain *d)
 
  undo_and_fail:
     d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
-    release_arg_xlat_area(d);
-    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
     while ( vcpuid-- != 0 )
     {
         if ( d->vcpu[vcpuid] != NULL )
             release_compat_l4(d->vcpu[vcpuid]);
-        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
-                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
     }
     return -ENOMEM;
 }
@@ -413,39 +399,28 @@ int arch_domain_create(struct domain *d)
     struct page_info *pg;
     int i;
 #endif
-    l1_pgentry_t gdt_l1e;
-    int vcpuid, pdpt_order;
+    int pdpt_order;
     int rc = -ENOMEM;
 
     INIT_LIST_HEAD(&d->arch.pdev_list);
 
     d->arch.relmem = RELMEM_not_started;
     INIT_LIST_HEAD(&d->arch.relmem_list);
+
     pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
     d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
     if ( d->arch.mm_perdomain_pt == NULL )
         goto fail;
     memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
 
-    /*
-     * Map Xen segments into every VCPU's GDT, irrespective of whether every
-     * VCPU will actually be used. This avoids an NMI race during context
-     * switch: if we take an interrupt after switching CR3 but before switching
-     * GDT, and the old VCPU# is invalid in the new domain, we would otherwise
-     * try to load CS from an invalid table.
-     */
-    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
-    for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
-        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
-                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
-
 #if defined(__i386__)
 
     mapcache_init(d);
 
 #else /* __x86_64__ */
 
-    if ( (pg = alloc_domheap_page(NULL)) == NULL )
+    pg = alloc_domheap_page(NULL);
+    if (pg == NULL)
         goto fail;
     d->arch.mm_perdomain_l2 = page_to_virt(pg);
     clear_page(d->arch.mm_perdomain_l2);
@@ -454,7 +429,8 @@ int arch_domain_create(struct domain *d)
             l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
                           __PAGE_HYPERVISOR);
 
-    if ( (pg = alloc_domheap_page(NULL)) == NULL )
+    pg = alloc_domheap_page(NULL);
+    if ( pg == NULL )
         goto fail;
     d->arch.mm_perdomain_l3 = page_to_virt(pg);
     clear_page(d->arch.mm_perdomain_l3);
@@ -474,6 +450,7 @@ int arch_domain_create(struct domain *d)
     {
         d->arch.ioport_caps = 
             rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+        rc = -ENOMEM;
         if ( d->arch.ioport_caps == NULL )
             goto fail;
 
@@ -502,7 +479,6 @@ int arch_domain_create(struct domain *d)
         d->arch.is_32bit_pv = d->arch.has_32bit_shinfo =
             (CONFIG_PAGING_LEVELS != 4);
     }
-        
 
     return 0;
 
@@ -1257,12 +1233,19 @@ static void paravirt_ctxt_switch_to(struct vcpu *v)
     }
 }
 
+static inline int need_full_gdt(struct vcpu *v)
+{
+    return (!is_hvm_vcpu(v) && !is_idle_vcpu(v));
+}
+
 static void __context_switch(void)
 {
     struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
     unsigned int          cpu = smp_processor_id();
     struct vcpu          *p = per_cpu(curr_vcpu, cpu);
     struct vcpu          *n = current;
+    struct desc_struct   *gdt;
+    struct desc_ptr       gdt_desc;
 
     ASSERT(p != n);
     ASSERT(cpus_empty(n->vcpu_dirty_cpumask));
@@ -1288,14 +1271,35 @@ static void __context_switch(void)
         cpu_set(cpu, n->domain->domain_dirty_cpumask);
     cpu_set(cpu, n->vcpu_dirty_cpumask);
 
+    gdt = !is_pv_32on64_vcpu(n) ? per_cpu(gdt_table, cpu) :
+                                  per_cpu(compat_gdt_table, cpu);
+    if ( need_full_gdt(n) )
+    {
+        struct page_info *page = virt_to_page(gdt);
+        unsigned int i;
+        for ( i = 0; i < NR_RESERVED_GDT_PAGES; i++ )
+            l1e_write(n->domain->arch.mm_perdomain_pt +
+                      (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+                      FIRST_RESERVED_GDT_PAGE + i,
+                      l1e_from_page(page + i, __PAGE_HYPERVISOR));
+    }
+
+    if ( need_full_gdt(p) &&
+         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(n)) )
+    {
+        gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
+        gdt_desc.base  = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
+        asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
+    }
+
     write_ptbase(n);
 
-    if ( p->vcpu_id != n->vcpu_id )
+    if ( need_full_gdt(n) &&
+         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(p)) )
     {
-        char gdt_load[10];
-        *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
-        *(unsigned long  *)(&gdt_load[2]) = GDT_VIRT_START(n);
-        __asm__ __volatile__ ( "lgdt %0" : "=m" (gdt_load) );
+        gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
+        gdt_desc.base  = GDT_VIRT_START(n);
+        asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
     }
 
     if ( p->domain != n->domain )
@@ -1345,9 +1349,6 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
         {
             uint64_t efer = read_efer();
 
-            local_flush_tlb_one(GDT_VIRT_START(next) +
-                                FIRST_RESERVED_GDT_BYTE);
-
             if ( !is_pv_32on64_vcpu(next) == !(efer & EFER_SCE) )
                 write_efer(efer ^ EFER_SCE);
         }
diff --git a/arch/x86/domain_build.c b/arch/x86/domain_build.c
index 40e6e93..f8e464d 100644
--- a/arch/x86/domain_build.c
+++ b/arch/x86/domain_build.c
@@ -335,24 +335,11 @@ int __init construct_dom0(
 #ifdef CONFIG_COMPAT
     if ( compat32 )
     {
-        l1_pgentry_t gdt_l1e;
-
         d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
         v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0];
 
         if ( nr_pages != (unsigned int)nr_pages )
             nr_pages = UINT_MAX;
-
-        /*
-         * Map compatibility Xen segments into every VCPU's GDT. See
-         * arch_domain_create() for further comments.
-         */
-        gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
-                                PAGE_HYPERVISOR);
-        for ( i = 0; i < MAX_VIRT_CPUS; i++ )
-            d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
-                                     FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
-        local_flush_tlb_one(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
     }
 #endif
     if ( parms.pae == PAEKERN_extended_cr3 )
diff --git a/arch/x86/hvm/vmx/vmcs.c b/arch/x86/hvm/vmx/vmcs.c
index a241d9f..8a119d4 100644
--- a/arch/x86/hvm/vmx/vmcs.c
+++ b/arch/x86/hvm/vmx/vmcs.c
@@ -334,27 +334,14 @@ struct host_execution_env {
 
 static void vmx_set_host_env(struct vcpu *v)
 {
-    unsigned int tr, cpu;
-    struct host_execution_env host_env;
-    struct Xgt_desc_struct desc;
-
-    cpu = smp_processor_id();
-    __asm__ __volatile__ ("sidt  (%0) \n" :: "a"(&desc) : "memory");
-    host_env.idtr_limit = desc.size;
-    host_env.idtr_base = desc.address;
-    __vmwrite(HOST_IDTR_BASE, host_env.idtr_base);
-
-    __asm__ __volatile__ ("sgdt  (%0) \n" :: "a"(&desc) : "memory");
-    host_env.gdtr_limit = desc.size;
-    host_env.gdtr_base = desc.address;
-    __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
-
-    __asm__ __volatile__ ("str  (%0) \n" :: "a"(&tr) : "memory");
-    host_env.tr_selector = tr;
-    host_env.tr_limit = sizeof(struct tss_struct);
-    host_env.tr_base = (unsigned long) &init_tss[cpu];
-    __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
-    __vmwrite(HOST_TR_BASE, host_env.tr_base);
+    unsigned int cpu = smp_processor_id();
+
+    __vmwrite(HOST_GDTR_BASE,
+              (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY));
+    __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
+
+    __vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
+    __vmwrite(HOST_TR_BASE, (unsigned long)&init_tss[cpu]);
 
     /*
      * Skip end of cpu_user_regs when entering the hypervisor because the
diff --git a/arch/x86/setup.c b/arch/x86/setup.c
index 4ff4f6b..2f0204f 100644
--- a/arch/x86/setup.c
+++ b/arch/x86/setup.c
@@ -109,6 +109,12 @@ extern void early_cpu_init(void);
 extern void vesa_init(void);
 extern void vesa_mtrr_init(void);
 
+DEFINE_PER_CPU(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
+#ifdef CONFIG_COMPAT
+DEFINE_PER_CPU(struct desc_struct *, compat_gdt_table)
+    = boot_cpu_compat_gdt_table;
+#endif
+
 struct tss_struct init_tss[NR_CPUS];
 
 char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
diff --git a/arch/x86/smpboot.c b/arch/x86/smpboot.c
index 027fb05..c23c90b 100644
--- a/arch/x86/smpboot.c
+++ b/arch/x86/smpboot.c
@@ -805,10 +805,15 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
  */
 {
 	unsigned long boot_error;
+	unsigned int order;
 	int timeout;
 	unsigned long start_eip;
 	unsigned short nmi_high = 0, nmi_low = 0;
 	struct vcpu *v;
+	struct desc_struct *gdt;
+#ifdef __x86_64__
+        struct page_info *page;
+#endif
 
 	++cpucount;
 
@@ -828,6 +833,41 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
 	/* Debug build: detect stack overflow by setting up a guard page. */
 	memguard_guard_stack(stack_start.esp);
 
+	gdt = per_cpu(gdt_table, cpu);
+	if (gdt == boot_cpu_gdt_table) {
+		order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+#ifdef __x86_64__
+#ifdef CONFIG_COMPAT
+		page = alloc_domheap_pages(NULL, order, 0);
+		per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
+		memcpy(gdt, boot_cpu_compat_gdt_table,
+		       NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+		gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+#endif
+		page = alloc_domheap_pages(NULL, order, 0);
+		per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
+#else
+		per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order);
+#endif
+		memcpy(gdt, boot_cpu_gdt_table,
+		       NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+		BUILD_BUG_ON(NR_CPUS > 0x10000);
+		gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+	}
+
+#ifdef __i386__
+	if (!per_cpu(doublefault_tss, cpu)) {
+		per_cpu(doublefault_tss, cpu) = alloc_xenheap_page();
+		memset(per_cpu(doublefault_tss, cpu), 0, PAGE_SIZE);
+	}
+#endif
+
+	if (!idt_tables[cpu]) {
+		idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
+		memcpy(idt_tables[cpu], idt_table,
+		       IDT_ENTRIES*sizeof(idt_entry_t));
+	}
+
 	/*
 	 * This grunge runs the startup process for
 	 * the targeted processor.
diff --git a/arch/x86/traps.c b/arch/x86/traps.c
index 8e72af7..5de70ee 100644
--- a/arch/x86/traps.c
+++ b/arch/x86/traps.c
@@ -2192,13 +2192,13 @@ void set_task_gate(unsigned int n, unsigned int sel)
 void set_tss_desc(unsigned int n, void *addr)
 {
     _set_tssldt_desc(
-        gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+        per_cpu(gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
         (unsigned long)addr,
         offsetof(struct tss_struct, __cacheline_filler) - 1,
         9);
 #ifdef CONFIG_COMPAT
     _set_tssldt_desc(
-        compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+         per_cpu(compat_gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
         (unsigned long)addr,
         offsetof(struct tss_struct, __cacheline_filler) - 1,
         11);
diff --git a/arch/x86/x86_32/mm.c b/arch/x86/x86_32/mm.c
index 9b3780f..ae4a340 100644
--- a/arch/x86/x86_32/mm.c
+++ b/arch/x86/x86_32/mm.c
@@ -191,7 +191,7 @@ void __init subarch_init_memory(void)
     {
         /* Guest kernel runs in ring 0, not ring 1. */
         struct desc_struct *d;
-        d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
+        d = &boot_cpu_gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
         d[0].b &= ~_SEGMENT_DPL;
         d[1].b &= ~_SEGMENT_DPL;
     }
diff --git a/arch/x86/x86_32/supervisor_mode_kernel.S b/arch/x86/x86_32/supervisor_mode_kernel.S
index c795704..39a74ac 100644
--- a/arch/x86/x86_32/supervisor_mode_kernel.S
+++ b/arch/x86/x86_32/supervisor_mode_kernel.S
@@ -100,15 +100,10 @@ ENTRY(fixup_ring0_guest_stack)
         # %gs:%esi now points to the guest stack before the
         # interrupt/exception occured.
 
-        /*
-         * Reverse the __TSS macro, giving us the CPU number.
-         * The TSS for this cpu is at init_tss + ( cpu * 128 ).
-         */
-        str   %ecx
-        shrl  $3,%ecx                                   # Calculate GDT index for TSS.
-        subl  $(FIRST_RESERVED_GDT_ENTRY+8),%ecx        # %ecx = 2*cpu.
-        shll  $6,%ecx                                   # Each TSS entry is 0x80 bytes
-        addl  $init_tss,%ecx                            # but we have 2*cpu from above.
+        movl  $PER_CPU_GDT_ENTRY*8,%ecx
+        lsll  %ecx,%ecx
+        shll  $7,%ecx                                   # Each TSS entry is 0x80 bytes
+        addl  $init_tss,%ecx
 
         # Load Xen stack from TSS.
         movw  TSS_ss0(%ecx),%ax
diff --git a/arch/x86/x86_32/traps.c b/arch/x86/x86_32/traps.c
index ebd174d..666a94a 100644
--- a/arch/x86/x86_32/traps.c
+++ b/arch/x86/x86_32/traps.c
@@ -136,19 +136,20 @@ void show_page_walk(unsigned long addr)
     unmap_domain_page(l1t);
 }
 
-#define DOUBLEFAULT_STACK_SIZE 2048
-static struct tss_struct doublefault_tss;
-static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
-
+DEFINE_PER_CPU(struct tss_struct *, doublefault_tss);
+static unsigned char __attribute__ ((__section__ (".bss.page_aligned")))
+    boot_cpu_doublefault_space[PAGE_SIZE];
 asmlinkage void do_double_fault(void)
 {
-    struct tss_struct *tss = &doublefault_tss;
-    unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
+    struct tss_struct *tss;
+    unsigned int cpu;
 
     watchdog_disable();
 
     console_force_unlock();
 
+    asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
+
     /* Find information saved during fault and dump it to the console. */
     tss = &init_tss[cpu];
     printk("*** DOUBLE FAULT ***\n");
@@ -234,34 +235,36 @@ unsigned long do_iret(void)
 
 void __init percpu_traps_init(void)
 {
-    struct tss_struct *tss = &doublefault_tss;
+    struct tss_struct *tss = this_cpu(doublefault_tss);
     asmlinkage int hypercall(void);
 
-    if ( smp_processor_id() != 0 )
-        return;
+    if ( !tss )
+    {
+        /* The hypercall entry vector is only accessible from ring 1. */
+        _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
 
-    /* The hypercall entry vector is only accessible from ring 1. */
-    _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
+        tss = (void *)boot_cpu_doublefault_space;
+        this_cpu(doublefault_tss) = tss;
+    }
 
     /*
      * Make a separate task for double faults. This will get us debug output if
      * we blow the kernel stack.
      */
-    memset(tss, 0, sizeof(*tss));
     tss->ds     = __HYPERVISOR_DS;
     tss->es     = __HYPERVISOR_DS;
     tss->ss     = __HYPERVISOR_DS;
-    tss->esp    = (unsigned long)&doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+    tss->esp    = (unsigned long)tss + PAGE_SIZE;
     tss->__cr3  = __pa(idle_pg_table);
     tss->cs     = __HYPERVISOR_CS;
     tss->eip    = (unsigned long)do_double_fault;
     tss->eflags = 2;
     tss->bitmap = IOBMP_INVALID_OFFSET;
     _set_tssldt_desc(
-        gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
+        this_cpu(gdt_table) + DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
         (unsigned long)tss, 235, 9);
 
-    set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
+    set_task_gate(TRAP_double_fault, DOUBLEFAULT_TSS_ENTRY << 3);
 }
 
 void init_int80_direct_trap(struct vcpu *v)
diff --git a/arch/x86/x86_64/traps.c b/arch/x86/x86_64/traps.c
index 7a9c2c1..d5615b3 100644
--- a/arch/x86/x86_64/traps.c
+++ b/arch/x86/x86_64/traps.c
@@ -147,15 +147,14 @@ void show_page_walk(unsigned long addr)
 asmlinkage void double_fault(void);
 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
 {
-    unsigned int cpu, tr;
-
-    asm ( "str %0" : "=r" (tr) );
-    cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
+    unsigned int cpu;
 
     watchdog_disable();
 
     console_force_unlock();
 
+    asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
+
     /* Find information saved during fault and dump it to the console. */
     printk("*** DOUBLE FAULT ***\n");
     print_xen_info();
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index a79627f..1797427 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -5,11 +5,7 @@
  * Xen reserves a memory page of GDT entries.
  * No guest GDT entries exist beyond the Xen reserved area.
  */
-#if MAX_PHYS_CPUS > 64
-#define NR_RESERVED_GDT_PAGES   2
-#else
 #define NR_RESERVED_GDT_PAGES   1
-#endif
 #define NR_RESERVED_GDT_BYTES   (NR_RESERVED_GDT_PAGES * PAGE_SIZE)
 #define NR_RESERVED_GDT_ENTRIES (NR_RESERVED_GDT_BYTES / 8)
 
@@ -38,11 +34,9 @@
 #define FLAT_COMPAT_USER_CS   FLAT_COMPAT_RING3_CS
 #define FLAT_COMPAT_USER_SS   FLAT_COMPAT_RING3_SS
 
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
-
-#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define LDT_ENTRY (TSS_ENTRY + 2)
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 2)
 
 #elif defined(__i386__)
 
@@ -53,19 +47,17 @@
 #define FLAT_COMPAT_USER_DS   FLAT_USER_DS
 #define FLAT_COMPAT_USER_SS   FLAT_USER_SS
 
-#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
-
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
+#define DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
 
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define LDT_ENTRY (TSS_ENTRY + 1)
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 1)
 
 #endif
 
 #ifndef __ASSEMBLY__
 
-#define load_TR(n)  __asm__ __volatile__ ("ltr  %%ax" : : "a" (__TSS(n)<<3) )
+#define load_TR(n)  __asm__ __volatile__ ("ltr  %%ax" : : "a" (TSS_ENTRY<<3) )
 
 #if defined(__x86_64__)
 #define GUEST_KERNEL_RPL(d) (is_pv_32bit_domain(d) ? 1 : 3)
@@ -198,20 +190,25 @@ __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
  "rorl $16,%%eax" \
  : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type|0x80))
 
+DECLARE_PER_CPU(struct tss_struct *, doublefault_tss);
+
 #endif
 
-extern struct desc_struct gdt_table[];
+struct desc_ptr {
+	unsigned short limit;
+	unsigned long base;
+} __attribute__((__packed__)) ;
+
+extern struct desc_struct boot_cpu_gdt_table[];
+DECLARE_PER_CPU(struct desc_struct *, gdt_table);
 #ifdef CONFIG_COMPAT
-extern struct desc_struct compat_gdt_table[];
+extern struct desc_struct boot_cpu_compat_gdt_table[];
+DECLARE_PER_CPU(struct desc_struct *, compat_gdt_table);
 #else
-# define compat_gdt_table gdt_table
+# define boot_cpu_compat_gdt_table boot_cpu_gdt_table
+# define per_cpu__compat_gdt_table per_cpu__gdt_table
 #endif
 
-struct Xgt_desc_struct {
-    unsigned short size;
-    unsigned long address __attribute__((packed));
-};
-
 extern void set_intr_gate(unsigned int irq, void * addr);
 extern void set_system_gate(unsigned int n, void *addr);
 extern void set_task_gate(unsigned int n, unsigned int sel);
diff --git a/include/asm-x86/ldt.h b/include/asm-x86/ldt.h
index b9a769f..c7f3f06 100644
--- a/include/asm-x86/ldt.h
+++ b/include/asm-x86/ldt.h
@@ -6,7 +6,6 @@
 
 static inline void load_LDT(struct vcpu *v)
 {
-    unsigned int cpu;
     struct desc_struct *desc;
     unsigned long ents;
 
@@ -16,11 +15,11 @@ static inline void load_LDT(struct vcpu *v)
     }
     else
     {
-        cpu = smp_processor_id();
-        desc = (!is_pv_32on64_vcpu(v) ? gdt_table : compat_gdt_table)
-               + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
+        desc = (!is_pv_32on64_vcpu(v)
+                ? this_cpu(gdt_table) : this_cpu(compat_gdt_table))
+               + LDT_ENTRY - FIRST_RESERVED_GDT_ENTRY;
         _set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
-        __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
+        __asm__ __volatile__ ( "lldt %%ax" : : "a" (LDT_ENTRY << 3) );
     }
 }