Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1464

kernel-2.6.18-238.el5.src.rpm

From: George Beshers <gbeshers@redhat.com>
Date: Thu, 31 Jul 2008 15:33:29 -0400
Subject: [IA64] Fix Altix BTE error return status
Message-id: 20080731192734.4411.62599.sendpatchset@dhcp-100-2-194.bos.redhat.com
O-Subject: [RHEL5.3 PATCH 10/19] [IA64] Fix Altix BTE error return status
Bugzilla: 455308
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>

[patch] Fix Altix BTE error return status

BZ#455308

Upstream: http://git.kernel.org/?p=linux/kernel/git/aegl/linux-2.6.git;a=commitdiff;h=64135fa97ce016058f95345425a9ebd04ee1bd2a

The Altix shub2 BTE error detail bits are in a different location
than on shub1.  The current code does not take this into account
resulting in all shub2 BTE failures mapping to "unknown".

This patch reads the error detail bits from the proper location,
so the correct BTE failure reason is returned for both shub1
and shub2.

Signed-off-by: Russ Anderson <rja@sgi.com>

diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index c55f487..4ddfcc4 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/module.h>
@@ -63,7 +63,7 @@ static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode)
  * Use the block transfer engine to move kernel memory from src to dest
  * using the assigned mode.
  *
- * Paramaters:
+ * Parameters:
  *   src - physical address of the transfer source.
  *   dest - physical address of the transfer destination.
  *   len - number of bytes to transfer from source to dest.
@@ -227,7 +227,7 @@ retry_bteop:
 		     BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
 
 	if (transfer_stat & IBLS_ERROR) {
-		bte_status = transfer_stat & ~IBLS_ERROR;
+		bte_status = BTE_GET_ERROR_STATUS(transfer_stat);
 	} else {
 		bte_status = BTE_SUCCESS;
 	}
@@ -247,7 +247,7 @@ EXPORT_SYMBOL(bte_copy);
  * use the block transfer engine to move kernel
  * memory from src to dest using the assigned mode.
  *
- * Paramaters:
+ * Parameters:
  *   src - physical address of the transfer source.
  *   dest - physical address of the transfer destination.
  *   len - number of bytes to transfer from source to dest.
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(bte_copy);
  *          for IBCT0/1 in the SGI documentation.
  *
  * NOTE: If the source, dest, and len are all cache line aligned,
- * then it would be _FAR_ preferrable to use bte_copy instead.
+ * then it would be _FAR_ preferable to use bte_copy instead.
  */
 bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 {
@@ -301,7 +301,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 	 * a standard bte copy.
 	 *
 	 * One nasty exception to the above rule is when the
-	 * source and destination are not symetrically
+	 * source and destination are not symmetrically
 	 * mis-aligned.  If the source offset from the first
 	 * cache line is different from the destination offset,
 	 * we make the first section be the entire transfer
@@ -338,7 +338,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 
 			if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
 				/*
-				 * We have two contigous bcopy
+				 * We have two contiguous bcopy
 				 * blocks.  Merge them.
 				 */
 				headBcopyLen += footBcopyLen;
@@ -376,7 +376,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 	} else {
 
 		/*
-		 * The transfer is not symetric, we will
+		 * The transfer is not symmetric, we will
 		 * allocate a buffer large enough for all the
 		 * data, bte_copy into that buffer and then
 		 * bcopy to the destination.
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
index b6fcf81..4cb09f3 100644
--- a/arch/ia64/sn/kernel/bte_error.c
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/types.h>
@@ -105,7 +105,7 @@ int shub1_bte_error_handler(unsigned long _nodepda)
 	}
 
 	BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
-	/* Reenable both bte interfaces */
+	/* Re-enable both bte interfaces */
 	imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
 	imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
 	REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);
@@ -148,7 +148,11 @@ int shub2_bte_error_handler(unsigned long _nodepda)
 	for (i = 0; i < BTES_PER_NODE; i++) {
 		bte = &err_nodepda->bte_if[i];
 		status = BTE_LNSTAT_LOAD(bte);
-		if ((status & IBLS_ERROR) || !(status & IBLS_BUSY))
+		if (status & IBLS_ERROR) {
+			bte->bh_error = BTE_SHUB2_ERROR(status);
+			continue;
+		}
+		if (!(status & IBLS_BUSY))
 			continue;
 		mod_timer(recovery_timer, jiffies + (HZ * 5));
 		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
@@ -243,7 +247,7 @@ bte_crb_error_handler(cnodeid_t cnode, int btenum,
 
 	/*
 	 * The caller has already figured out the error type, we save that
-	 * in the bte handle structure for the thread excercising the
+	 * in the bte handle structure for the thread exercising the
 	 * interface to consume.
 	 */
 	bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET;
diff --git a/include/asm-ia64/sn/bte.h b/include/asm-ia64/sn/bte.h
index 5335d87..a0d214f 100644
--- a/include/asm-ia64/sn/bte.h
+++ b/include/asm-ia64/sn/bte.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -150,6 +150,35 @@ typedef enum {
 	BTEFAIL_NOTAVAIL,	/* BTE not available */
 } bte_result_t;
 
+#define BTEFAIL_SH2_RESP_SHORT	0x1	/* bit 000001 */
+#define BTEFAIL_SH2_RESP_LONG	0x2	/* bit 000010 */
+#define BTEFAIL_SH2_RESP_DSP	0x4	/* bit 000100 */
+#define BTEFAIL_SH2_RESP_ACCESS	0x8	/* bit 001000 */
+#define BTEFAIL_SH2_CRB_TO	0x10	/* bit 010000 */
+#define BTEFAIL_SH2_NACK_LIMIT	0x20	/* bit 100000 */
+#define BTEFAIL_SH2_ALL		0x3F	/* bit 111111 */
+
+#define	BTE_ERR_BITS	0x3FUL
+#define	BTE_ERR_SHIFT	36
+#define BTE_ERR_MASK	(BTE_ERR_BITS << BTE_ERR_SHIFT)
+
+#define BTE_ERROR_RETRY(value)						\
+	(is_shub2() ? (value != BTEFAIL_SH2_CRB_TO)			\
+		: (value != BTEFAIL_TOUT))
+
+/*
+ * On shub1 BTE_ERR_MASK will always be false, so no need for is_shub2()
+ */
+#define BTE_SHUB2_ERROR(_status)					\
+	((_status & BTE_ERR_MASK) 					\
+	   ? (((_status >> BTE_ERR_SHIFT) & BTE_ERR_BITS) | IBLS_ERROR) \
+	   : _status)
+
+#define BTE_GET_ERROR_STATUS(_status)					\
+	(BTE_SHUB2_ERROR(_status) & ~IBLS_ERROR)
+
+#define BTE_VALID_SH2_ERROR(value)					\
+	((value >= BTEFAIL_SH2_RESP_SHORT) && (value <= BTEFAIL_SH2_ALL))
 
 /*
  * Structure defining a bte.  An instance of this
diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h
index 6f807e0..f7711b3 100644
--- a/include/asm-ia64/sn/xp.h
+++ b/include/asm-ia64/sn/xp.h
@@ -86,7 +86,7 @@ xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
 	BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
 
 	ret = bte_copy(src, pdst, len, mode, notification);
-	if (ret != BTE_SUCCESS) {
+	if ((ret != BTE_SUCCESS) && BTE_ERROR_RETRY(ret)) {
 		if (!in_interrupt()) {
 			cond_resched();
 		}
@@ -244,7 +244,30 @@ enum xpc_retval {
 
 	xpcDisconnected,	/* 51: channel disconnected (closed) */
 
-	xpcUnknownReason	/* 52: unknown reason -- must be last in list */
+	xpcBteSh2Start,		/* 52: BTE CRB timeout */
+
+				/* 53: 0x1 BTE Error Response Short */
+	xpcBteSh2RspShort = xpcBteSh2Start + BTEFAIL_SH2_RESP_SHORT,
+
+				/* 54: 0x2 BTE Error Response Long */
+	xpcBteSh2RspLong = xpcBteSh2Start + BTEFAIL_SH2_RESP_LONG,
+
+				/* 56: 0x4 BTE Error Response DSB */
+	xpcBteSh2RspDSB = xpcBteSh2Start + BTEFAIL_SH2_RESP_DSP,
+
+				/* 60: 0x8 BTE Error Response Access */
+	xpcBteSh2RspAccess = xpcBteSh2Start + BTEFAIL_SH2_RESP_ACCESS,
+
+				/* 68: 0x10 BTE Error CRB timeout */
+	xpcBteSh2CRBTO = xpcBteSh2Start + BTEFAIL_SH2_CRB_TO,
+
+				/* 84: 0x20 BTE Error NACK limit */
+	xpcBteSh2NACKLimit = xpcBteSh2Start + BTEFAIL_SH2_NACK_LIMIT,
+
+				/* 115: BTE end */
+	xpcBteSh2End = xpcBteSh2Start + BTEFAIL_SH2_ALL,
+
+	xpcUnknownReason	/* 116: unknown reason -- must be last in list */
 };
 
 
diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h
index 23ee0cf..5ff4df9 100644
--- a/include/asm-ia64/sn/xpc.h
+++ b/include/asm-ia64/sn/xpc.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2007 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -1211,6 +1211,14 @@ xpc_IPI_init(int index)
 static inline enum xpc_retval
 xpc_map_bte_errors(bte_result_t error)
 {
+	if (error == BTE_SUCCESS)
+		return xpcSuccess;
+
+	if (is_shub2()) {
+		if (BTE_VALID_SH2_ERROR(error))
+			return xpcBteSh2Start + error;
+		return xpcBteUnmappedError;
+	}
 	switch (error) {
 	case BTE_SUCCESS:	return xpcSuccess;
 	case BTEFAIL_DIR:	return xpcBteDirectoryError;