Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > bee470e21a8ce2cdd844d7d805aa403d > files > 237

glibc-2.5-49.el5_5.6.src.rpm

2007-12-23  Ulrich Drepper  <drepper@redhat.com>

        * sysdeps/x86_64/cacheinfo.c (intel_02_known): New entry 0x3f.
        * sysdeps/unix/sysv/linux/i386/sysconf.c (intel_02_known): Likewise.

2007-10-09  Ulrich Drepper  <drepper@redhat.com>

        * sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Work around problem
        with some Pentium Ds.

2007-09-21  Ulrich Drepper  <drepper@redhat.com>

        * sysdeps/x86_64/cacheinfo.c (__x86_64_data_cache_size_half): Renamed
        from __x86_64_core_cache_size_half.
        (init_cacheinfo): Compute shared cache size for AMD processors with
        shared L3 correctly.
        * sysdeps/x86_64/memcpy.S: Adjust for __x86_64_data_cache_size_half
        name change.
        Patch in large parts by Evandro Menezes.

2007-08-25  Ulrich Drepper  <drepper@redhat.com>

        * sysdeps/x86_64/cacheinfo.c (handle_amd): Fix computation of
        associativity for fully-associative caches.

        * sysdeps/x86_64/cacheinfo.c (handle_amd): Handle L3 cache
        requests.  Fill on more associativity values for L2.
        Patch mostly by Evandro Menezes.

2007-07-09  Ulrich Drepper  <drepper@redhat.com>

        * sysdeps/x86_64/cacheinfo.c (intel_02_known): Add new entries.
        * sysdeps/unix/sysv/linux/i386/sysconf.c (intel_02_known): Likewise.

--- libc/sysdeps/x86_64/cacheinfo.c.jj	2008-01-08 21:51:33.000000000 +0100
+++ libc/sysdeps/x86_64/cacheinfo.c	2008-01-08 21:52:07.000000000 +0100
@@ -48,6 +48,7 @@ static const struct intel_02_cache_info
     { 0x3c, _SC_LEVEL2_CACHE_SIZE,   262144,  4, 64 },
     { 0x3d, _SC_LEVEL2_CACHE_SIZE,   393216,  6, 64 },
     { 0x3e, _SC_LEVEL2_CACHE_SIZE,   524288,  4, 64 },
+    { 0x3f, _SC_LEVEL2_CACHE_SIZE,   262144,  2, 64 },
     { 0x41, _SC_LEVEL2_CACHE_SIZE,   131072,  4, 32 },
     { 0x42, _SC_LEVEL2_CACHE_SIZE,   262144,  4, 32 },
     { 0x43, _SC_LEVEL2_CACHE_SIZE,   524288,  4, 32 },
@@ -55,11 +56,13 @@ static const struct intel_02_cache_info
     { 0x45, _SC_LEVEL2_CACHE_SIZE,  2097152,  4, 32 },
     { 0x46, _SC_LEVEL3_CACHE_SIZE,  4194304,  4, 64 },
     { 0x47, _SC_LEVEL3_CACHE_SIZE,  8388608,  8, 64 },
+    { 0x48, _SC_LEVEL2_CACHE_SIZE,  3145728, 12, 64 },
     { 0x49, _SC_LEVEL2_CACHE_SIZE,  4194304, 16, 64 },
     { 0x4a, _SC_LEVEL3_CACHE_SIZE,  6291456, 12, 64 },
     { 0x4b, _SC_LEVEL3_CACHE_SIZE,  8388608, 16, 64 },
     { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
     { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
+    { 0x4e, _SC_LEVEL2_CACHE_SIZE,  6291456, 24, 64 },
     { 0x60, _SC_LEVEL1_DCACHE_SIZE,   16384,  8, 64 },
     { 0x66, _SC_LEVEL1_DCACHE_SIZE,    8192,  4, 64 },
     { 0x67, _SC_LEVEL1_DCACHE_SIZE,   16384,  4, 64 },
@@ -257,7 +260,8 @@ handle_amd (int name)
 		: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
 		: "0" (0x80000000));
 
-  if (name >= _SC_LEVEL3_CACHE_SIZE)
+  /* No level 4 cache (yet).  */
+  if (name > _SC_LEVEL3_CACHE_LINESIZE)
     return 0;
 
   unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
@@ -278,36 +282,87 @@ handle_amd (int name)
     {
     case _SC_LEVEL1_DCACHE_SIZE:
       return (ecx >> 14) & 0x3fc00;
+
     case _SC_LEVEL1_DCACHE_ASSOC:
       ecx >>= 16;
       if ((ecx & 0xff) == 0xff)
 	/* Fully associative.  */
 	return (ecx << 2) & 0x3fc00;
       return ecx & 0xff;
+
     case _SC_LEVEL1_DCACHE_LINESIZE:
       return ecx & 0xff;
+
     case _SC_LEVEL2_CACHE_SIZE:
       return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
+
     case _SC_LEVEL2_CACHE_ASSOC:
-      ecx >>= 12;
-      switch (ecx & 0xf)
+      switch ((ecx >> 12) & 0xf)
         {
         case 0:
         case 1:
         case 2:
         case 4:
-	  return ecx & 0xf;
+	  return (ecx >> 12) & 0xf;
 	case 6:
 	  return 8;
 	case 8:
 	  return 16;
-	case 0xf:
-	  return (ecx << 6) & 0x3fffc00;
+	case 10:
+	  return 32;
+	case 11:
+	  return 48;
+	case 12:
+	  return 64;
+	case 13:
+	  return 96;
+	case 14:
+	  return 128;
+	case 15:
+	  return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
 	default:
 	  return 0;
         }
+      /* NOTREACHED */
+
     case _SC_LEVEL2_CACHE_LINESIZE:
       return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
+
+    case _SC_LEVEL3_CACHE_SIZE:
+      return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
+
+    case _SC_LEVEL3_CACHE_ASSOC:
+      switch ((edx >> 12) & 0xf)
+	{
+	case 0:
+	case 1:
+	case 2:
+	case 4:
+	  return (edx >> 12) & 0xf;
+	case 6:
+	  return 8;
+	case 8:
+	  return 16;
+	case 10:
+	  return 32;
+	case 11:
+	  return 48;
+	case 12:
+	  return 64;
+	case 13:
+	  return 96;
+	case 14:
+	  return 128;
+	case 15:
+	  return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
+	default:
+	  return 0;
+	}
+      /* NOTREACHED */
+
+    case _SC_LEVEL3_CACHE_LINESIZE:
+      return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
+
     default:
       assert (! "cannot happen");
     }
@@ -344,13 +399,13 @@ __cache_sysconf (int name)
 }
 
 
-/* Half the core cache size for use in memory and string routines, typically
-   L1 size. */
-long int __x86_64_core_cache_size_half attribute_hidden = 32 * 1024 / 2;
+/* Half the data cache size for use in memory and string routines, typically
+   L1 size.  */
+long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
 /* Shared cache size for use in memory and string routines, typically
-   L2 or L3 size. */
+   L2 or L3 size.  */
 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
-/* PREFETCHW support flag for use in memory and string routines. */
+/* PREFETCHW support flag for use in memory and string routines.  */
 int __x86_64_prefetchw attribute_hidden;
 
 
@@ -365,7 +420,7 @@ init_cacheinfo (void)
   unsigned int edx;
   int max_cpuid;
   int max_cpuid_ex;
-  long int core = -1;
+  long int data = -1;
   long int shared = -1;
   unsigned int level;
   unsigned int threads = 0;
@@ -377,31 +432,42 @@ init_cacheinfo (void)
   /* This spells out "GenuineIntel".  */
   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
     {
-      core = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
+      data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
 
-      /* Try L3 first. */
+      /* Try L3 first.  */
       level  = 3;
       shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
 
       if (shared <= 0)
         {
-	  /* Try L2 otherwise. */
+	  /* Try L2 otherwise.  */
           level  = 2;
           shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
 	}
 
+      asm volatile ("cpuid"
+		    : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+		    : "0" (1));
+
       /* Figure out the number of logical threads that share the
-	 highest cache level. */
+	 highest cache level.  */
       if (max_cpuid >= 4)
         {
 	  int i = 0;
 
-	  /* Query until desired cache level is enumerated. */
+	  /* Query until desired cache level is enumerated.  */
 	  do
 	    {
               asm volatile ("cpuid"
 		            : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
 		            : "0" (4), "2" (i++));
+
+	      /* There seems to be a bug in at least some Pentium Ds
+		 which sometimes fail to iterate all cache parameters.
+		 Do not loop indefinitely here, stop in this case and
+		 assume there is no such information.  */
+	      if ((eax & 0x1f) == 0)
+		goto intel_bug_no_cache_info;
 	    }
           while (((eax >> 5) & 0x7) != level);
 
@@ -409,42 +475,80 @@ init_cacheinfo (void)
 	}
       else
         {
-	  /* Assume that all logical threads share the highest cache level. */
-          asm volatile ("cpuid"
-		        : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
-		        : "0" (1));
+	intel_bug_no_cache_info:
+	  /* Assume that all logical threads share the highest cache level.  */
 
 	  threads = (ebx >> 16) & 0xff;
 	}
 
       /* Cap usage of highest cache level to the number of supported
-	 threads. */
+	 threads.  */
       if (shared > 0 && threads > 0)
         shared /= threads;
     }
   /* This spells out "AuthenticAMD".  */
   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
     {
-      core   = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
-      shared = handle_amd (_SC_LEVEL2_CACHE_SIZE);
+      data   = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
+      long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
+      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
 
+      /* Get maximum extended function. */
       asm volatile ("cpuid"
 		    : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx)
 		    : "0" (0x80000000));
 
+      if (shared <= 0)
+	/* No shared L3 cache.  All we have is the L2 cache.  */
+	shared = core;
+      else
+	{
+	  /* Figure out the number of logical threads that share L3.  */
+	  if (max_cpuid_ex >= 0x80000008)
+	    {
+	      /* Get width of APIC ID.  */
+	      asm volatile ("cpuid"
+			    : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
+			      "=d" (edx)
+			    : "0" (0x80000008));
+	      threads = 1 << ((ecx >> 12) & 0x0f);
+	    }
+
+	  if (threads == 0)
+	    {
+	      /* If APIC ID width is not available, use logical
+		 processor count.  */
+	      asm volatile ("cpuid"
+			    : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
+			      "=d" (edx)
+			    : "0" (0x00000001));
+
+	      if ((edx & (1 << 28)) != 0)
+		threads = (ebx >> 16) & 0xff;
+	    }
+
+	  /* Cap usage of highest cache level to the number of
+	     supported threads.  */
+	  if (threads > 0)
+	    shared /= threads;
+
+	  /* Account for exclusive L2 and L3 caches.  */
+	  shared += core;
+	}
+
       if (max_cpuid_ex >= 0x80000001)
 	{
 	  asm volatile ("cpuid"
 			: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
 			: "0" (0x80000001));
-	  /*  PREFETCHW     || 3DNow! */
+	  /*  PREFETCHW     || 3DNow!  */
 	  if ((ecx & 0x100) || (edx & 0x80000000))
 	    __x86_64_prefetchw = -1;
 	}
     }
 
-  if (core > 0)
-    __x86_64_core_cache_size_half = core / 2;
+  if (data > 0)
+    __x86_64_data_cache_size_half = data / 2;
 
   if (shared > 0)
     __x86_64_shared_cache_size_half = shared / 2;
--- libc/sysdeps/x86_64/memcpy.S.jj	2008-01-08 21:25:40.000000000 +0100
+++ libc/sysdeps/x86_64/memcpy.S	2008-01-08 21:41:32.000000000 +0100
@@ -249,7 +249,7 @@ L(32after):
 
 L(fasttry):				/* first 1/2 L1 */
 #ifndef NOT_IN_libc			/* only up to this algorithm outside of libc.so */
-	movq	__x86_64_core_cache_size_half (%rip), %r11
+	movq	__x86_64_data_cache_size_half (%rip), %r11
 	cmpq	%rdx, %r11		/* calculate the smaller of */
 	cmovaq	%rdx, %r11		/* remaining bytes and 1/2 L1 */
 #endif
--- libc/sysdeps/unix/sysv/linux/i386/sysconf.c.jj	2008-01-08 18:46:27.000000000 +0100
+++ libc/sysdeps/unix/sysv/linux/i386/sysconf.c	2008-01-08 21:59:21.000000000 +0100
@@ -90,6 +90,7 @@ static const struct intel_02_cache_info
     { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 },
     { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 },
     { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
+    { 0x3f, _SC_LEVEL2_CACHE_SIZE, 262144, 2, 64 },
     { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 },
     { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 },
     { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 },
@@ -97,11 +98,13 @@ static const struct intel_02_cache_info
     { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
     { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
     { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
+    { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 },
     { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
     { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
     { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
     { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
     { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
+    { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 },
     { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
     { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
     { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },