Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 730

kernel-2.6.18-238.el5.src.rpm

From: Bhavna Sarathy <bnagendr@redhat.com>
Date: Thu, 5 Aug 2010 18:03:08 -0400
Subject: [edac] amd64_edac: add leaner syndrome decoding algorithm
Message-id: <20100805180845.6566.50978.sendpatchset@localhost.localdomain>
Patchwork-id: 27415
O-Subject: [RHEL5.6 PATCH 1/10] amd64_edac - Add a leaner syndrome decoding
	algorithm
Bugzilla: 568576
RH-Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>

>From 1a261184f30f7572c3326cb56baa32456732af86 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Sat, 31 Jul 2010 02:36:55 +0200
Subject: [PATCH] amd64_edac: Add a leaner syndrome decoding algorithm

(Upstream commit bfc04aec7d687282b5e7adb26799d3eb50d05f01)

Instead of using the whole syndrome tables for channel decoding, use a
set of eigenvectors with which the tables can be generated to search for
the syndrome in error. The algorithm operates independently of symbol
size and can be used for both x4 and x8 syndromes.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 2490a21..4b8c60c 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -657,7 +657,7 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
        return csrow;
 }
 
-static int get_channel_from_ecc_syndrome(unsigned short syndrome);
+static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
 
 static void amd64_cpu_display_info(struct amd64_pvt *pvt)
 {
@@ -1426,7 +1426,7 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
 {
        struct amd64_pvt *pvt = mci->pvt_info;
        u32 page, offset;
-       unsigned short syndrome;
+       u16 syndrome;
        int nid, csrow, chan = 0;
 
        csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
@@ -1447,7 +1447,7 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
 	 * this point.
 	 */
 	if (dct_ganging_enabled(pvt) && pvt->nbcfg & K8_NBCFG_CHIPKILL)
-		chan = get_channel_from_ecc_syndrome(syndrome);
+		chan = get_channel_from_ecc_syndrome(mci, syndrome);
 
 	if (chan >= 0)
 		edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan,
@@ -1564,142 +1564,167 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor,
 }
 
 /*
- * syndrome mapping table for ECC ChipKill devices
+ * These are tables of eigenvectors (one per line) which can be used for the
+ * construction of the syndrome tables. The modified syndrome search algorithm
+ * uses those to find the symbol in error and thus the DIMM.
  *
- * The comment in each row is the token (nibble) number that is in error.
- * The least significant nibble of the syndrome is the mask for the bits
- * that are in error (need to be toggled) for the particular nibble.
- *
- * Each row contains 16 entries.
- * The first entry (0th) is the channel number for that row of syndromes.
- * The remaining 15 entries are the syndromes for the respective Error
- * bit mask index.
- *
- * 1st index entry is 0x0001 mask, indicating that the rightmost bit is the
- * bit in error.
- * The 2nd index entry is 0x0010 that the second bit is damaged.
- * The 3rd index entry is 0x0011 indicating that the rightmost 2 bits
- * are damaged.
- * Thus so on until index 15, 0x1111, whose entry has the syndrome
- * indicating that all 4 bits are damaged.
- *
- * A search is performed on this table looking for a given syndrome.
- *
- * See the AMD documentation for ECC syndromes. This ECC table is valid
- * across all the versions of the AMD64 processors.
- *
- * A fast lookup is to use the LAST four bits of the 16-bit syndrome as a
- * COLUMN index, then search all ROWS of that column, looking for a match
- * with the input syndrome. The ROW value will be the token number.
- *
- * The 0'th entry on that row, can be returned as the CHANNEL (0 or 1) of this
- * error.
+ * Algorithm courtesy of Ross LaFetra from AMD.
  */
-#define NUMBER_ECC_ROWS  36
-static const unsigned short ecc_chipkill_syndromes[NUMBER_ECC_ROWS][16] = {
-       /* Channel 0 syndromes */
-       {/*0*/  0, 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57,
-          0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df },
-       {/*1*/  0, 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7,
-          0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f },
-       {/*2*/  0, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
-          0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f },
-       {/*3*/  0, 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057,
-          0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df },
-       {/*4*/  0, 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097,
-          0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f },
-       {/*5*/  0, 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857,
-          0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf },
-       {/*6*/  0, 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467,
-          0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f },
-       {/*7*/  0, 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27,
-          0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff },
-       {/*8*/  0, 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177,
-          0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f },
-       {/*9*/  0, 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07,
-          0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f },
-       {/*a*/  0, 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07,
-          0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f },
-       {/*b*/  0, 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7,
-          0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f },
-       {/*c*/  0, 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87,
-          0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f },
-       {/*d*/  0, 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067,
-          0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f },
-       {/*e*/  0, 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77,
-          0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f },
-       {/*f*/  0, 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77,
-          0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x956d, 0xa6ee, 0xb72f },
-
-       /* Channel 1 syndromes */
-       {/*10*/ 1, 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187,
-          0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f },
-       {/*11*/ 1, 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627,
-          0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff },
-       {/*12*/ 1, 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97,
-          0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f },
-       {/*13*/ 1, 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97,
-          0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f },
-       {/*14*/ 1, 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987,
-          0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f },
-       {/*15*/ 1, 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677,
-          0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f },
-       {/*16*/ 1, 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387,
-          0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f },
-       {/*17*/ 1, 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17,
-          0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf },
-       {/*18*/ 1, 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7,
-          0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f },
-       {/*19*/ 1, 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397,
-          0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f },
-       {/*1a*/ 1, 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617,
-          0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf },
-       {/*1b*/ 1, 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527,
-          0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff },
-       {/*1c*/ 1, 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7,
-          0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef },
-       {/*1d*/ 1, 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7,
-          0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def },
-       {/*1e*/ 1, 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67,
-          0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f },
-       {/*1f*/ 1, 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377,
-          0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f },
-
-       /* ECC bits are also in the set of tokens and they too can go bad
-        * first 2 cover channel 0, while the second 2 cover channel 1
-        */
-       {/*20*/ 0, 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807,
-          0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f },
-       {/*21*/ 0, 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07,
-          0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f },
-       {/*22*/ 1, 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97,
-          0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f },
-       {/*23*/ 1, 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757,
-          0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df }
+static u16 x4_vectors[] = {
+	0x2f57, 0x1afe, 0x66cc, 0xdd88,
+	0x11eb, 0x3396, 0x7f4c, 0xeac8,
+	0x0001, 0x0002, 0x0004, 0x0008,
+	0x1013, 0x3032, 0x4044, 0x8088,
+	0x106b, 0x30d6, 0x70fc, 0xe0a8,
+	0x4857, 0xc4fe, 0x13cc, 0x3288,
+	0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
+	0x1f39, 0x251e, 0xbd6c, 0x6bd8,
+	0x15c1, 0x2a42, 0x89ac, 0x4758,
+	0x2b03, 0x1602, 0x4f0c, 0xca08,
+	0x1f07, 0x3a0e, 0x6b04, 0xbd08,
+	0x8ba7, 0x465e, 0x244c, 0x1cc8,
+	0x2b87, 0x164e, 0x642c, 0xdc18,
+	0x40b9, 0x80de, 0x1094, 0x20e8,
+	0x27db, 0x1eb6, 0x9dac, 0x7b58,
+	0x11c1, 0x2242, 0x84ac, 0x4c58,
+	0x1be5, 0x2d7a, 0x5e34, 0xa718,
+	0x4b39, 0x8d1e, 0x14b4, 0x28d8,
+	0x4c97, 0xc87e, 0x11fc, 0x33a8,
+	0x8e97, 0x497e, 0x2ffc, 0x1aa8,
+	0x16b3, 0x3d62, 0x4f34, 0x8518,
+	0x1e2f, 0x391a, 0x5cac, 0xf858,
+	0x1d9f, 0x3b7a, 0x572c, 0xfe18,
+	0x15f5, 0x2a5a, 0x5264, 0xa3b8,
+	0x1dbb, 0x3b66, 0x715c, 0xe3f8,
+	0x4397, 0xc27e, 0x17fc, 0x3ea8,
+	0x1617, 0x3d3e, 0x6464, 0xb8b8,
+	0x23ff, 0x12aa, 0xab6c, 0x56d8,
+	0x2dfb, 0x1ba6, 0x913c, 0x7328,
+	0x185d, 0x2ca6, 0x7914, 0x9e28,
+	0x171b, 0x3e36, 0x7d7c, 0xebe8,
+	0x4199, 0x82ee, 0x19f4, 0x2e58,
+	0x4807, 0xc40e, 0x130c, 0x3208,
+	0x1905, 0x2e0a, 0x5804, 0xac08,
+	0x213f, 0x132a, 0xadfc, 0x5ba8,
+	0x19a9, 0x2efe, 0xb5cc, 0x6f88,
 };
 
-/*
- * Given the syndrome argument, scan each of the channel tables for a syndrome
- * match. Depending on which table it is found, return the channel number.
- */
-static int get_channel_from_ecc_syndrome(unsigned short syndrome)
-{
-       int row;
-       int column;
-
-       /* Determine column to scan */
-       column = syndrome & 0xF;
-
-       /* Scan all rows, looking for syndrome, or end of table */
-       for (row = 0; row < NUMBER_ECC_ROWS; row++) {
-               if (ecc_chipkill_syndromes[row][column] == syndrome)
-                       return ecc_chipkill_syndromes[row][0];
-       }
+static u16 x8_vectors[] = {
+	0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
+	0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
+	0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
+	0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
+	0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
+	0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
+	0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
+	0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
+	0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
+	0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
+	0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
+	0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
+	0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
+	0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
+	0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
+	0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
+	0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
+	0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+	0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
+};
 
-       debugf0("syndrome(%x) not found\n", syndrome);
-       return -1;
+static int decode_syndrome(u16 syndrome, u16 *vectors, int num_vecs,
+				 int v_dim)
+{
+	unsigned int i, err_sym;
+
+	for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
+		u16 s = syndrome;
+		int v_idx =  err_sym * v_dim;
+		int v_end = (err_sym + 1) * v_dim;
+
+		/* walk over all 16 bits of the syndrome */
+		for (i = 1; i < (1U << 16); i <<= 1) {
+
+			/* if bit is set in that eigenvector... */
+			if (v_idx < v_end && vectors[v_idx] & i) {
+				u16 ev_comp = vectors[v_idx++];
+
+				/* ... and bit set in the modified syndrome, */
+				if (s & i) {
+					/* remove it. */
+					s ^= ev_comp;
+					if (!s)
+						return err_sym;
+				}
+			} else if (s & i)
+				/* can't get to zero, move to next symbol */
+				break;
+		}
+	}
+	debugf0("syndrome(%x) not found\n", syndrome);
+	return -1;
+}
+
+static int map_err_sym_to_channel(int err_sym, int sym_size)
+{
+	if (sym_size == 4)
+		switch (err_sym) {
+		case 0x20:
+		case 0x21:
+			return 0;
+			break;
+		case 0x22:
+		case 0x23:
+			return 1;
+			break;
+		default:
+			return err_sym >> 4;
+			break;
+		}
+	/* x8 symbols */
+	else
+		switch (err_sym) {
+		/* imaginary bits not in a DIMM */
+		case 0x10:
+			printk(KERN_ERR "Invalid error symbol: 0x%x\n", err_sym);
+			return -1;
+			break;
+
+		case 0x11:
+			return 0;
+			break;
+		case 0x12:
+			return 1;
+			break;
+		default:
+			return err_sym >> 3;
+			break;
+		}
+	return -1;
+}
+
+static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 value = 0;
+	int err_sym = 0;
+
+	pci_read_config_dword(pvt->misc_f3_ctl, 0x180, &value);
+
+	/* F3x180[EccSymbolSize]=1, x8 symbols */
+	if (boot_cpu_data.x86 == 0x10 &&
+	    boot_cpu_data.x86_model > 7 &&
+	    value & BIT(25)) {
+		err_sym = decode_syndrome(syndrome, x8_vectors,
+					  ARRAY_SIZE(x8_vectors), 8);
+		return map_err_sym_to_channel(err_sym, 8);
+	} else {
+		err_sym = decode_syndrome(syndrome, x4_vectors,
+					  ARRAY_SIZE(x4_vectors), 4);
+		return map_err_sym_to_channel(err_sym, 4);
+	}
 }
 
+
 /*
  * Check for valid error in the NB Status High register. If so, proceed to read
  * NB Status Low, NB Address Low and NB Address High registers and store data