Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 2824

kernel-2.6.18-128.1.10.el5.src.rpm

From: Tetsu Yamamoto <tyamamot@redhat.com>
Date: Thu, 20 Dec 2007 18:04:54 -0500
Subject: [xen] ia64: guest has bad network performance
Message-id: 20071220174356.9E52.TYAMAMOT@redhat.com
O-Subject: [RHEL5.2 PATCH][Xen] Windows Guest/IA64 have bad network performance.
Bugzilla: 272201

Attached patches fix BZ#272201.
https://bugzilla.redhat.com/show_bug.cgi?id=272201

These are backported from the upstream to optimize the ptc.e emulation
and vtlb size.

- cs15560: [IA64] Speedup ptc.e emulation
  http://xenbits.xensource.com/ext/ia64/xen-unstable.hg?rev/834ac63f4894
- cs15696: [IA64] Make MMU setting of domVTi configurable
  http://xenbits.xensource.com/ext/ia64/xen-unstable.hg?rev/5b19839d0365
- cs15725: [IA64] Shrink vtlb size
  http://xenbits.xensource.com/ext/ia64/xen-unstable.hg?rev/f317c27973f5

I've tested these patches with kernel-xen-2.6.18-58.el5, and confirmed
with ttcp that network performance has been improved as follows:

Result of network throughput by executing 'ttcpia64 -u -t -l65000
-n75000 xx.xx.xx.xx'
  Before : 39709.25 KB/sec
  After  : 70960.53 KB/sec

Please review and ACK.

Regards,

Tetsu Yamamoto

# HG changeset patch
# User Alex Williamson <alex.williamson@hp.com>
# Date 1186942753 21600
# Node ID 5b19839d036508fb2721a567798359dd11f68916
# Parent  54c721bb6d452d8eb97a151c847c9276868ae5c5
[IA64] Make MMU setting of domVTi configurable

This patch makes MMU setting of domVTi configurable.
The size of VTLB and VHPT can be set by boot option.
(e.g. "vti_vtlb_size=256k vti_vhpt_size=1m")

Also some cleanups.

Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>

diff --git a/arch/ia64/vmx/vmmu.c b/arch/ia64/vmx/vmmu.c
index 41a0356..79e22f5 100644
--- a/arch/ia64/vmx/vmmu.c
+++ b/arch/ia64/vmx/vmmu.c
@@ -19,23 +19,48 @@
  *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
  *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
  */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/tlb.h>
-#include <asm/gcc_intrin.h>
-#include <asm/vcpu.h>
-#include <linux/interrupt.h>
 #include <asm/vmx_vcpu.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/vmx.h>
-#include <asm/hw_irq.h>
 #include <asm/vmx_pal_vsa.h>
-#include <asm/kregs.h>
-#include <asm/vcpu.h>
-#include <xen/irq.h>
-#include <xen/errno.h>
 #include <xen/sched-if.h>
 
+static int default_vtlb_sz = DEFAULT_VTLB_SZ;
+static int default_vhpt_sz = DEFAULT_VHPT_SZ;
+
+static void __init parse_vtlb_size(char *s)
+{
+    int sz = parse_size_and_unit(s, NULL);
+
+    if (sz > 0) {
+        default_vtlb_sz = fls(sz - 1);
+        /* minimum 16KB (for tag uniqueness) */
+        if (default_vtlb_sz < 14)
+            default_vtlb_sz = 14;
+    }
+}
+
+static int canonicalize_vhpt_size(int sz)
+{
+    /* minimum 32KB */
+    if (sz < 15)
+        return 15;
+    /* maximum 8MB (since purging TR is hard coded) */
+    if (sz > IA64_GRANULE_SHIFT - 1)
+        return IA64_GRANULE_SHIFT - 1;
+    return sz;
+}
+
+static void __init parse_vhpt_size(char *s)
+{
+    int sz = parse_size_and_unit(s, NULL);
+    if (sz > 0) {
+        default_vhpt_sz = fls(sz - 1);
+        default_vhpt_sz = canonicalize_vhpt_size(default_vhpt_sz);
+    }
+}
+
+custom_param("vti_vtlb_size", parse_vtlb_size);
+custom_param("vti_vhpt_size", parse_vhpt_size);
+
 /*
  * Get the machine page frame number in 16KB unit
  * Input:
@@ -132,66 +157,33 @@ purge_machine_tc_by_domid(domid_t domid)
 
 static int init_domain_vhpt(struct vcpu *v)
 {
-    struct page_info *page;
-    void * vbase;
-    page = alloc_domheap_pages (NULL, VCPU_VHPT_ORDER, 0);
-    if ( page == NULL ) {
-        printk("No enough contiguous memory for init_domain_vhpt\n");
-        return -ENOMEM;
-    }
-    vbase = page_to_virt(page);
-    memset(vbase, 0, VCPU_VHPT_SIZE);
-    printk(XENLOG_DEBUG "Allocate domain vhpt at 0x%p\n", vbase);
-    
-    VHPT(v,hash) = vbase;
-    VHPT(v,hash_sz) = VCPU_VHPT_SIZE/2;
-    VHPT(v,cch_buf) = (void *)((u64)vbase + VHPT(v,hash_sz));
-    VHPT(v,cch_sz) = VCPU_VHPT_SIZE - VHPT(v,hash_sz);
-    thash_init(&(v->arch.vhpt),VCPU_VHPT_SHIFT-1);
-    v->arch.arch_vmx.mpta = v->arch.vhpt.pta.val;
+    int rc;
 
-    return 0;
+    rc = thash_alloc(&(v->arch.vhpt), default_vhpt_sz, "vhpt");
+    v->arch.arch_vmx.mpta = v->arch.vhpt.pta.val;
+    return rc;
 }
 
 
 static void free_domain_vhpt(struct vcpu *v)
 {
-    struct page_info *page;
-
-    if (v->arch.vhpt.hash) {
-        page = virt_to_page(v->arch.vhpt.hash);
-        free_domheap_pages(page, VCPU_VHPT_ORDER);
-        v->arch.vhpt.hash = 0;
-    }
-
-    return;
+    if (v->arch.vhpt.hash)
+        thash_free(&(v->arch.vhpt));
 }
 
 int init_domain_tlb(struct vcpu *v)
 {
-    struct page_info *page;
-    void * vbase;
     int rc;
 
     rc = init_domain_vhpt(v);
     if (rc)
         return rc;
 
-    page = alloc_domheap_pages (NULL, VCPU_VTLB_ORDER, 0);
-    if ( page == NULL ) {
-        printk("No enough contiguous memory for init_domain_tlb\n");
+    rc = thash_alloc(&(v->arch.vtlb), default_vtlb_sz, "vtlb");
+    if (rc) {
         free_domain_vhpt(v);
-        return -ENOMEM;
+        return rc;
     }
-    vbase = page_to_virt(page);
-    memset(vbase, 0, VCPU_VTLB_SIZE);
-    printk(XENLOG_DEBUG "Allocate domain vtlb at 0x%p\n", vbase);
-    
-    VTLB(v,hash) = vbase;
-    VTLB(v,hash_sz) = VCPU_VTLB_SIZE/2;
-    VTLB(v,cch_buf) = (void *)((u64)vbase + VTLB(v,hash_sz));
-    VTLB(v,cch_sz) = VCPU_VTLB_SIZE - VTLB(v,hash_sz);
-    thash_init(&(v->arch.vtlb),VCPU_VTLB_SHIFT-1);
     
     return 0;
 }
@@ -199,12 +191,8 @@ int init_domain_tlb(struct vcpu *v)
 
 void free_domain_tlb(struct vcpu *v)
 {
-    struct page_info *page;
-
-    if ( v->arch.vtlb.hash) {
-        page = virt_to_page(v->arch.vtlb.hash);
-        free_domheap_pages(page, VCPU_VTLB_ORDER);
-    }
+    if (v->arch.vtlb.hash)
+        thash_free(&(v->arch.vtlb));
 
     free_domain_vhpt(v);
 }
@@ -252,41 +240,9 @@ void machine_tlb_insert(struct vcpu *v, thash_data_t *tlb)
  */
 void machine_tlb_purge(u64 va, u64 ps)
 {
-//    u64       psr;
-//    psr = ia64_clear_ic();
     ia64_ptcl(va, ps << 2);
-//    ia64_set_psr(psr);
-//    ia64_srlz_i();
-//    return;
-}
-/*
-u64 machine_thash(u64 va)
-{
-    return ia64_thash(va);
-}
-
-u64 machine_ttag(u64 va)
-{
-    return ia64_ttag(va);
-}
-*/
-thash_data_t * vsa_thash(PTA vpta, u64 va, u64 vrr, u64 *tag)
-{
-    u64 index,pfn,rid,pfn_bits;
-    pfn_bits = vpta.size-5-8;
-    pfn = REGION_OFFSET(va)>>_REGION_PAGE_SIZE(vrr);
-    rid = _REGION_ID(vrr);
-    index = ((rid&0xff)<<pfn_bits)|(pfn&((1UL<<pfn_bits)-1));
-    *tag = ((rid>>8)&0xffff) | ((pfn >>pfn_bits)<<16);
-    return (thash_data_t *)((vpta.base<<PTA_BASE_SHIFT)+(index<<5));
-//    return ia64_call_vsa(PAL_VPS_THASH,va,vrr,vpta,0,0,0,0);
 }
 
-//u64 vsa_ttag(u64 va, u64 vrr)
-//{
-//    return ia64_call_vsa(PAL_VPS_TTAG,va,vrr,0,0,0,0,0);
-//}
-
 int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref)
 {
     ia64_rr  vrr;
diff --git a/arch/ia64/vmx/vmx_entry.S b/arch/ia64/vmx/vmx_entry.S
index ef400c4..afe9762 100644
--- a/arch/ia64/vmx/vmx_entry.S
+++ b/arch/ia64/vmx/vmx_entry.S
@@ -20,21 +20,9 @@
  *  Kun Tian (Kevin Tian) (kevin.tian@intel.com)
  */
 
-#ifndef VCPU_TLB_SHIFT
-#define VCPU_TLB_SHIFT	22
-#endif
 #include <linux/config.h>
 #include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/kregs.h>
 #include <asm/offsets.h>
-#include <asm/pgtable.h>
-#include <asm/percpu.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/vhpt.h>
-#include <asm/vmmu.h>
 #include "vmx_minstate.h"
 
 GLOBAL_ENTRY(ia64_leave_nested)
@@ -719,7 +707,7 @@ GLOBAL_ENTRY(vmx_switch_rr7)
    movl r25=PAGE_KERNEL
    ;;
    or loc5 = r25,loc5          // construct PA | page properties
-   mov r23 = VCPU_VHPT_SHIFT <<2
+   mov r23 = IA64_GRANULE_SHIFT <<2
    ;;
    ptr.d   in3,r23
    ;;
diff --git a/arch/ia64/vmx/vtlb.c b/arch/ia64/vmx/vtlb.c
index 3bd1a66..e60b81d 100644
--- a/arch/ia64/vmx/vtlb.c
+++ b/arch/ia64/vmx/vtlb.c
@@ -21,34 +21,14 @@
  *  XiaoYan Feng (Fleming Feng) (Fleming.feng@intel.com)
  */
 
-#include <linux/sched.h>
-#include <asm/tlb.h>
-#include <xen/mm.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/gcc_intrin.h>
-#include <linux/interrupt.h>
 #include <asm/vmx_vcpu.h>
-#include <asm/vmx_phy_mode.h>
-#include <asm/vmmu.h>
-#include <asm/tlbflush.h>
-#include <asm/regionreg.h>
-#define  MAX_CCH_LENGTH     40
 
 thash_data_t *__alloc_chain(thash_cb_t *);
 
-static void cch_mem_init(thash_cb_t *hcb)
+static inline void cch_mem_init(thash_cb_t *hcb)
 {
-    int num;
-    thash_data_t *p;
-
-    hcb->cch_freelist = p = hcb->cch_buf;
-    num = (hcb->cch_sz/sizeof(thash_data_t))-1;
-    do{
-        p->next =p+1;
-        p++;
-        num--;
-    }while(num);
-    p->next = NULL;
+    hcb->cch_free_idx = 0;
+    hcb->cch_freelist = NULL;
 }
 
 static thash_data_t *cch_alloc(thash_cb_t *hcb)
@@ -56,8 +36,16 @@ static thash_data_t *cch_alloc(thash_cb_t *hcb)
     thash_data_t *p;
     if ( (p = hcb->cch_freelist) != NULL ) {
         hcb->cch_freelist = p->next;
+        return p;
+    }
+    if (hcb->cch_free_idx < hcb->cch_sz/sizeof(thash_data_t)) {
+        p = &((thash_data_t *)hcb->cch_buf)[hcb->cch_free_idx++];
+        p->page_flags = 0;
+        p->itir = 0;
+        p->next = NULL;
+        return p;
     }
-    return p;
+    return NULL;
 }
 
 /*
@@ -298,6 +286,17 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte)
     return ret;
 }
 
+static thash_data_t * vtlb_thash(PTA vpta, u64 va, u64 vrr, u64 *tag)
+{
+    u64 index, pfn, rid;
+
+    pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
+    rid = _REGION_ID(vrr);
+    index = (pfn ^ rid) & ((1UL << (vpta.size - 5)) - 1);
+    *tag = pfn ^ (rid << 39);
+    return (thash_data_t *)((vpta.base << PTA_BASE_SHIFT) + (index << 5));
+}
+
 /*
  *  purge software guest tlb
  */
@@ -320,7 +319,7 @@ static void vtlb_purge(VCPU *v, u64 va, u64 ps)
         size = PSIZE(rr_ps);
         vrr.ps = rr_ps;
         while (num) {
-            cur = vsa_thash(hcb->pta, curadr, vrr.rrval, &tag);
+            cur = vtlb_thash(hcb->pta, curadr, vrr.rrval, &tag);
             while (cur) {
                 if (cur->etag == tag && cur->ps == rr_ps)
                     cur->etag = 1UL << 63;
@@ -413,7 +412,7 @@ void vtlb_insert(VCPU *v, u64 pte, u64 itir, u64 va)
     vcpu_get_rr(v, va, &vrr.rrval);
     vrr.ps = itir_ps(itir);
     VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
-    hash_table = vsa_thash(hcb->pta, va, vrr.rrval, &tag);
+    hash_table = vtlb_thash(hcb->pta, va, vrr.rrval, &tag);
     cch = hash_table;
     while (cch) {
         if (INVALID_TLB(cch)) {
@@ -645,7 +644,7 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 va,int is_data)
         ps = __ffs(psbits);
         psbits &= ~(1UL << ps);
         vrr.ps = ps;
-        cch = vsa_thash(hcb->pta, va, vrr.rrval, &tag);
+        cch = vtlb_thash(hcb->pta, va, vrr.rrval, &tag);
         do {
             if (cch->etag == tag && cch->ps == ps)
                 return cch;
@@ -659,16 +658,15 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 va,int is_data)
 /*
  * Initialize internal control data before service.
  */
-void thash_init(thash_cb_t *hcb, u64 sz)
+static void thash_init(thash_cb_t *hcb, u64 sz)
 {
     int num;
-    thash_data_t *head, *p;
+    thash_data_t *head;
 
     hcb->pta.val = (unsigned long)hcb->hash;
     hcb->pta.vf = 1;
     hcb->pta.ve = 1;
     hcb->pta.size = sz;
-    hcb->cch_rec_head = hcb->hash;
     
     head=hcb->hash;
     num = (hcb->hash_sz/sizeof(thash_data_t));
@@ -680,16 +678,47 @@ void thash_init(thash_cb_t *hcb, u64 sz)
         head++;
         num--;
     }while(num);
+
+    hcb->cch_free_idx = 0;
+    hcb->cch_freelist = NULL;
+}
+
+int thash_alloc(thash_cb_t *hcb, u64 sz_log2, char *what)
+{
+    struct page_info *page;
+    void * vbase;
+    u64 sz = 1UL << sz_log2;
+
+    page = alloc_domheap_pages(NULL, (sz_log2 + 1 - PAGE_SHIFT), 0);
+    if (page == NULL) {
+        printk("No enough contiguous memory(%ldKB) for init_domain_%s\n", 
+               sz >> (10 - 1), what);
+        return -ENOMEM;
+    }
+    vbase = page_to_virt(page);
+    memset(vbase, 0, sz + sz); // hash + collisions chain
+    if (sz_log2 >= 20 - 1)
+        printk(XENLOG_DEBUG "Allocate domain %s at 0x%p(%ldMB)\n", 
+               what, vbase, sz >> (20 - 1));
+    else
+        printk(XENLOG_DEBUG "Allocate domain %s at 0x%p(%ldKB)\n",
+               what, vbase, sz >> (10 - 1));
     
-    hcb->cch_freelist = p = hcb->cch_buf;
-    num = hcb->cch_sz / sizeof(thash_data_t);
-    do{
-        p->page_flags = 0;
-        p->itir = 0;
-        p->next =p+1;
-        p++;
-        num--;
-    }while(num);
+    hcb->hash = vbase;
+    hcb->hash_sz = sz;
+    hcb->cch_buf = (void *)((u64)vbase + hcb->hash_sz);
+    hcb->cch_sz = sz;
+    thash_init(hcb, sz_log2);
+    return 0;
+}
 
-    (p - 1)->next = NULL;
+void thash_free(thash_cb_t *hcb)
+{
+    struct page_info *page;
+
+    if (hcb->hash) {
+        page = virt_to_page(hcb->hash);
+        free_domheap_pages(page, hcb->pta.size + 1 - PAGE_SHIFT);
+        hcb->hash = 0;
+    }
 }
diff --git a/include/asm-ia64/vmmu.h b/include/asm-ia64/vmmu.h
index bf6889b..5710837 100644
--- a/include/asm-ia64/vmmu.h
+++ b/include/asm-ia64/vmmu.h
@@ -24,12 +24,8 @@
 #define XEN_TLBthash_H
 
 #define     MAX_CCN_DEPTH       (15)       // collision chain depth
-#define     VCPU_VTLB_SHIFT     (20)    // 1M for VTLB
-#define     VCPU_VTLB_SIZE      (1UL<<VCPU_VTLB_SHIFT)
-#define     VCPU_VTLB_ORDER     (VCPU_VTLB_SHIFT - PAGE_SHIFT)
-#define     VCPU_VHPT_SHIFT     (24)    // 16M for VTLB
-#define     VCPU_VHPT_SIZE      (1UL<<VCPU_VHPT_SHIFT)
-#define     VCPU_VHPT_ORDER     (VCPU_VHPT_SHIFT - PAGE_SHIFT)
+#define     DEFAULT_VTLB_SZ     (14) // 16K hash + 16K c-chain for VTLB
+#define     DEFAULT_VHPT_SZ     (23) // 8M hash + 8M c-chain for VHPT
 #define     VTLB(v,_x)          (v->arch.vtlb._x)
 #define     VHPT(v,_x)          (v->arch.vhpt._x)
 #ifndef __ASSEMBLY__
@@ -195,15 +191,17 @@ typedef struct thash_cb {
     u64     hash_sz;        // size of above data.
     void    *cch_buf;       // base address of collision chain.
     u64     cch_sz;         // size of above data.
+    u64     cch_free_idx;   // index of free entry.
     thash_data_t *cch_freelist;
-    thash_data_t *cch_rec_head;  // cch recycle header
     PTA     pta;
 } thash_cb_t;
 
 /*
- * Initialize internal control data before service.
+ * Allocate and initialize internal control data before service.
  */
-extern void thash_init(thash_cb_t *hcb, u64 sz);
+extern int thash_alloc(thash_cb_t *hcb, u64 sz, char *what);
+
+extern void thash_free(thash_cb_t *hcb);
 
 /*
  * Insert an entry to hash table.