2007-12-23 Ulrich Drepper <drepper@redhat.com> * sysdeps/x86_64/cacheinfo.c (intel_02_known): New entry 0x3f. * sysdeps/unix/sysv/linux/i386/sysconf.c (intel_02_known): Likewise. 2007-10-09 Ulrich Drepper <drepper@redhat.com> * sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Work around problem with some Pentium Ds. 2007-09-21 Ulrich Drepper <drepper@redhat.com> * sysdeps/x86_64/cacheinfo.c (__x86_64_data_cache_size_half): Renamed from __x86_64_core_cache_size_half. (init_cacheinfo): Compute shared cache size for AMD processors with shared L3 correctly. * sysdeps/x86_64/memcpy.S: Adjust for __x86_64_data_cache_size_half name change. Patch in large parts by Evandro Menezes. 2007-08-25 Ulrich Drepper <drepper@redhat.com> * sysdeps/x86_64/cacheinfo.c (handle_amd): Fix computation of associativity for fully-associative caches. * sysdeps/x86_64/cacheinfo.c (handle_amd): Handle L3 cache requests. Fill on more associativity values for L2. Patch mostly by Evandro Menezes. 2007-07-09 Ulrich Drepper <drepper@redhat.com> * sysdeps/x86_64/cacheinfo.c (intel_02_known): Add new entries. * sysdeps/unix/sysv/linux/i386/sysconf.c (intel_02_known): Likewise. --- libc/sysdeps/x86_64/cacheinfo.c.jj 2008-01-08 21:51:33.000000000 +0100 +++ libc/sysdeps/x86_64/cacheinfo.c 2008-01-08 21:52:07.000000000 +0100 @@ -48,6 +48,7 @@ static const struct intel_02_cache_info { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 }, { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 }, { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, + { 0x3f, _SC_LEVEL2_CACHE_SIZE, 262144, 2, 64 }, { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 }, { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 }, { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 }, @@ -55,11 +56,13 @@ static const struct intel_02_cache_info { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 }, { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 }, { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 }, + { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 }, { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 }, { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 }, { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 }, { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 }, { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 }, + { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 }, { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 }, { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 }, { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 }, @@ -257,7 +260,8 @@ handle_amd (int name) : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (0x80000000)); - if (name >= _SC_LEVEL3_CACHE_SIZE) + /* No level 4 cache (yet). */ + if (name > _SC_LEVEL3_CACHE_LINESIZE) return 0; unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); @@ -278,36 +282,87 @@ handle_amd (int name) { case _SC_LEVEL1_DCACHE_SIZE: return (ecx >> 14) & 0x3fc00; + case _SC_LEVEL1_DCACHE_ASSOC: ecx >>= 16; if ((ecx & 0xff) == 0xff) /* Fully associative. */ return (ecx << 2) & 0x3fc00; return ecx & 0xff; + case _SC_LEVEL1_DCACHE_LINESIZE: return ecx & 0xff; + case _SC_LEVEL2_CACHE_SIZE: return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; + case _SC_LEVEL2_CACHE_ASSOC: - ecx >>= 12; - switch (ecx & 0xf) + switch ((ecx >> 12) & 0xf) { case 0: case 1: case 2: case 4: - return ecx & 0xf; + return (ecx >> 12) & 0xf; case 6: return 8; case 8: return 16; - case 0xf: - return (ecx << 6) & 0x3fffc00; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); default: return 0; } + /* NOTREACHED */ + case _SC_LEVEL2_CACHE_LINESIZE: return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; + + case _SC_LEVEL3_CACHE_SIZE: + return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1; + + case _SC_LEVEL3_CACHE_ASSOC: + switch ((edx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (edx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); + default: + return 0; + } + /* NOTREACHED */ + + case _SC_LEVEL3_CACHE_LINESIZE: + return (edx & 0xf000) == 0 ? 0 : edx & 0xff; + default: assert (! "cannot happen"); } @@ -344,13 +399,13 @@ __cache_sysconf (int name) } -/* Half the core cache size for use in memory and string routines, typically - L1 size. */ -long int __x86_64_core_cache_size_half attribute_hidden = 32 * 1024 / 2; +/* Half the data cache size for use in memory and string routines, typically + L1 size. */ +long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2; /* Shared cache size for use in memory and string routines, typically - L2 or L3 size. */ + L2 or L3 size. */ long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; -/* PREFETCHW support flag for use in memory and string routines. */ +/* PREFETCHW support flag for use in memory and string routines. */ int __x86_64_prefetchw attribute_hidden; @@ -365,7 +420,7 @@ init_cacheinfo (void) unsigned int edx; int max_cpuid; int max_cpuid_ex; - long int core = -1; + long int data = -1; long int shared = -1; unsigned int level; unsigned int threads = 0; @@ -377,31 +432,42 @@ init_cacheinfo (void) /* This spells out "GenuineIntel". */ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) { - core = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); + data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); - /* Try L3 first. */ + /* Try L3 first. */ level = 3; shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid); if (shared <= 0) { - /* Try L2 otherwise. */ + /* Try L2 otherwise. */ level = 2; shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); } + asm volatile ("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "0" (1)); + /* Figure out the number of logical threads that share the - highest cache level. */ + highest cache level. */ if (max_cpuid >= 4) { int i = 0; - /* Query until desired cache level is enumerated. */ + /* Query until desired cache level is enumerated. */ do { asm volatile ("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (4), "2" (i++)); + + /* There seems to be a bug in at least some Pentium Ds + which sometimes fail to iterate all cache parameters. + Do not loop indefinitely here, stop in this case and + assume there is no such information. */ + if ((eax & 0x1f) == 0) + goto intel_bug_no_cache_info; } while (((eax >> 5) & 0x7) != level); @@ -409,42 +475,80 @@ init_cacheinfo (void) } else { - /* Assume that all logical threads share the highest cache level. */ - asm volatile ("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (1)); + intel_bug_no_cache_info: + /* Assume that all logical threads share the highest cache level. */ threads = (ebx >> 16) & 0xff; } /* Cap usage of highest cache level to the number of supported - threads. */ + threads. */ if (shared > 0 && threads > 0) shared /= threads; } /* This spells out "AuthenticAMD". */ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) { - core = handle_amd (_SC_LEVEL1_DCACHE_SIZE); - shared = handle_amd (_SC_LEVEL2_CACHE_SIZE); + data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); + long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); + shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); + /* Get maximum extended function. */ asm volatile ("cpuid" : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (0x80000000)); + if (shared <= 0) + /* No shared L3 cache. All we have is the L2 cache. */ + shared = core; + else + { + /* Figure out the number of logical threads that share L3. */ + if (max_cpuid_ex >= 0x80000008) + { + /* Get width of APIC ID. */ + asm volatile ("cpuid" + : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), + "=d" (edx) + : "0" (0x80000008)); + threads = 1 << ((ecx >> 12) & 0x0f); + } + + if (threads == 0) + { + /* If APIC ID width is not available, use logical + processor count. */ + asm volatile ("cpuid" + : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), + "=d" (edx) + : "0" (0x00000001)); + + if ((edx & (1 << 28)) != 0) + threads = (ebx >> 16) & 0xff; + } + + /* Cap usage of highest cache level to the number of + supported threads. */ + if (threads > 0) + shared /= threads; + + /* Account for exclusive L2 and L3 caches. */ + shared += core; + } + if (max_cpuid_ex >= 0x80000001) { asm volatile ("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (0x80000001)); - /* PREFETCHW || 3DNow! */ + /* PREFETCHW || 3DNow! */ if ((ecx & 0x100) || (edx & 0x80000000)) __x86_64_prefetchw = -1; } } - if (core > 0) - __x86_64_core_cache_size_half = core / 2; + if (data > 0) + __x86_64_data_cache_size_half = data / 2; if (shared > 0) __x86_64_shared_cache_size_half = shared / 2; --- libc/sysdeps/x86_64/memcpy.S.jj 2008-01-08 21:25:40.000000000 +0100 +++ libc/sysdeps/x86_64/memcpy.S 2008-01-08 21:41:32.000000000 +0100 @@ -249,7 +249,7 @@ L(32after): L(fasttry): /* first 1/2 L1 */ #ifndef NOT_IN_libc /* only up to this algorithm outside of libc.so */ - movq __x86_64_core_cache_size_half (%rip), %r11 + movq __x86_64_data_cache_size_half (%rip), %r11 cmpq %rdx, %r11 /* calculate the smaller of */ cmovaq %rdx, %r11 /* remaining bytes and 1/2 L1 */ #endif --- libc/sysdeps/unix/sysv/linux/i386/sysconf.c.jj 2008-01-08 18:46:27.000000000 +0100 +++ libc/sysdeps/unix/sysv/linux/i386/sysconf.c 2008-01-08 21:59:21.000000000 +0100 @@ -90,6 +90,7 @@ static const struct intel_02_cache_info { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 }, { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 }, { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, + { 0x3f, _SC_LEVEL2_CACHE_SIZE, 262144, 2, 64 }, { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 }, { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 }, { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 }, @@ -97,11 +98,13 @@ static const struct intel_02_cache_info { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 }, { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 }, { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 }, + { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 }, { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 }, { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 }, { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 }, { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 }, { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 }, + { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 }, { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 }, { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 }, { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },