Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 1560

kernel-2.6.18-128.1.10.el5.src.rpm

From: Neil Horman <nhorman@redhat.com>
Date: Wed, 8 Oct 2008 20:31:57 -0400
Subject: [net] sky2: fix hang resulting from link flap
Message-id: 20081009003157.GA26552@hmsendeavour.rdu.redhat.com
O-Subject: Re: [RHEL 5.3 PATCH] sky2: fix hang resulting from link flap (bz 461681)
Bugzilla: 461681

On Wed, Oct 08, 2008 at 05:52:42PM -0400, Don Zickus wrote:
> On Tue, Oct 07, 2008 at 10:47:56AM -0400, Neil Horman wrote:
> > Hey all-
> > 	This patch is a backport of upstream commits:
> > 75e806838a3327d4ca9030e588d34de11b04f341
> > 32c2c30085324aef9699934295281cca0161ef7e
> >
> > They fix a sky2 hardware bug that results in a hang on a full rx fifo, which was
> > causing the hang reported in bz 461681.  Tested and verified by the reporter.
> >
> > Regards
> > Neil
>

Its exactly the same as the posted version, but it includes the missing definition from
upstream (which includes normalizing the definition name to SKY2_HW_RAM_BUFFER,
and the flags field in the private data to hold those bits).

Neil

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 5fe8477..4a7d3d9 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -96,10 +96,6 @@ static int disable_msi = 0;
 module_param(disable_msi, int, 0);
 MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
 
-static int idle_timeout = 0;
-module_param(idle_timeout, int, 0);
-MODULE_PARM_DESC(idle_timeout, "Watchdog timer for lost interrupts (ms)");
-
 static const struct pci_device_id sky2_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, /* SK-9Sxx */
 	{ PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, /* SK-9Exx */
@@ -1266,6 +1262,8 @@ static int sky2_up(struct net_device *dev)
 	if (ramsize > 0) {
 		u32 rxspace;
 
+		hw->flags |= SKY2_HW_RAM_BUFFER;
+		pr_debug(PFX "%s: ram buffer %dK\n", dev->name, ramsize);
 		if (ramsize < 16)
 			rxspace = ramsize / 2;
 		else
@@ -1692,6 +1690,8 @@ static void sky2_link_up(struct sky2_port *sky2)
 	netif_carrier_on(sky2->netdev);
 	netif_wake_queue(sky2->netdev);
 
+	mod_timer(&hw->watchdog_timer, jiffies + 1);
+
 	/* Turn on link LED */
 	sky2_write8(hw, SK_REG(port, LNK_LED_REG),
 		    LINKLED_ON | LINKLED_BLINK_OFF | LINKLED_LINKSYNC_OFF);
@@ -2367,25 +2367,72 @@ static void sky2_le_error(struct sky2_hw *hw, unsigned port,
 	sky2_write32(hw, Q_ADDR(q, Q_CSR), BMU_CLR_IRQ_CHK);
 }
 
-/* If idle then force a fake soft NAPI poll once a second
- * to work around cases where sharing an edge triggered interrupt.
- */
-static inline void sky2_idle_start(struct sky2_hw *hw)
+static int sky2_rx_hung(struct net_device *dev)
 {
-	if (idle_timeout > 0)
-		mod_timer(&hw->idle_timer,
-			  jiffies + msecs_to_jiffies(idle_timeout));
+	struct sky2_port *sky2 = netdev_priv(dev);
+	struct sky2_hw *hw = sky2->hw;
+	unsigned port = sky2->port;
+	unsigned rxq = rxqaddr[port];
+	u32 mac_rp = sky2_read32(hw, SK_REG(port, RX_GMF_RP));
+	u8 mac_lev = sky2_read8(hw, SK_REG(port, RX_GMF_RLEV));
+	u8 fifo_rp = sky2_read8(hw, Q_ADDR(rxq, Q_RP));
+	u8 fifo_lev = sky2_read8(hw, Q_ADDR(rxq, Q_RL));
+
+	/* If idle and MAC or PCI is stuck */
+	if (sky2->check.last == dev->last_rx &&
+	    ((mac_rp == sky2->check.mac_rp &&
+	      mac_lev != 0 && mac_lev >= sky2->check.mac_lev) ||
+	     /* Check if the PCI RX hang */
+	     (fifo_rp == sky2->check.fifo_rp &&
+	      fifo_lev != 0 && fifo_lev >= sky2->check.fifo_lev))) {
+		printk(KERN_DEBUG PFX "%s: hung mac %d:%d fifo %d (%d:%d)\n",
+		       dev->name, mac_lev, mac_rp, fifo_lev, fifo_rp,
+		       sky2_read8(hw, Q_ADDR(rxq, Q_WP)));
+		return 1;
+	} else {
+		sky2->check.last = dev->last_rx;
+		sky2->check.mac_rp = mac_rp;
+		sky2->check.mac_lev = mac_lev;
+		sky2->check.fifo_rp = fifo_rp;
+		sky2->check.fifo_lev = fifo_lev;
+		return 0;
+	}
 }
 
-static void sky2_idle(unsigned long arg)
+static void sky2_watchdog(unsigned long arg)
 {
 	struct sky2_hw *hw = (struct sky2_hw *) arg;
-	struct net_device *dev = hw->dev[0];
+	struct net_device *dev;
 
-	if (__netif_rx_schedule_prep(dev))
-		__netif_rx_schedule(dev);
+	/* Check for lost IRQ once a second */
+	if (sky2_read32(hw, B0_ISRC)) {
+		dev = hw->dev[0];
+		if (__netif_rx_schedule_prep(dev))
+			__netif_rx_schedule(dev);
+	} else {
+		int i, active = 0;
+
+		for (i = 0; i < hw->ports; i++) {
+			dev = hw->dev[i];
+			if (!netif_running(dev))
+				continue;
+			++active;
+
+			/* For chips with Rx FIFO, check if stuck */
+			if ((hw->flags & SKY2_HW_RAM_BUFFER) &&
+			     sky2_rx_hung(dev)) {
+				pr_info(PFX "%s: receiver hang detected\n",
+					dev->name);
+				schedule_work(&hw->restart_work);
+				return;
+			}
+		}
+
+		if (active == 0)
+			return;
+	}
 
-	mod_timer(&hw->idle_timer, jiffies + msecs_to_jiffies(idle_timeout));
+	mod_timer(&hw->watchdog_timer, round_jiffies(jiffies + HZ));
 }
 
 /* Hardware/software error handling */
@@ -2668,8 +2715,6 @@ static void sky2_restart(void *d)
 
 	dev_dbg(&hw->pdev->dev, "restarting\n");
 
-	del_timer_sync(&hw->idle_timer);
-
 	rtnl_lock();
 	sky2_write32(hw, B0_IMSK, 0);
 	sky2_read32(hw, B0_IMSK);
@@ -2698,8 +2743,6 @@ static void sky2_restart(void *d)
 		}
 	}
 
-	sky2_idle_start(hw);
-
 	rtnl_unlock();
 }
 
@@ -3706,10 +3749,9 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 			sky2_show_addr(dev1);
 	}
 
-	setup_timer(&hw->idle_timer, sky2_idle, (unsigned long) hw);
-	INIT_WORK(&hw->restart_work, sky2_restart, hw);
+	setup_timer(&hw->watchdog_timer, sky2_watchdog, (unsigned long) hw);
 
-	sky2_idle_start(hw);
+	INIT_WORK(&hw->restart_work, sky2_restart, hw);
 
 	pci_set_drvdata(pdev, hw);
 
@@ -3744,7 +3786,7 @@ static void __devexit sky2_remove(struct pci_dev *pdev)
 	if (!hw)
 		return;
 
-	del_timer_sync(&hw->idle_timer);
+	del_timer_sync(&hw->watchdog_timer);
 
 	flush_scheduled_work();
 
@@ -3785,7 +3827,6 @@ static int sky2_suspend(struct pci_dev *pdev, pm_message_t state)
 	struct sky2_hw *hw = pci_get_drvdata(pdev);
 	int i, wol = 0;
 
-	del_timer_sync(&hw->idle_timer);
 	netif_poll_disable(hw->dev[0]);
 
 	for (i = 0; i < hw->ports; i++) {
@@ -3848,7 +3889,7 @@ static int sky2_resume(struct pci_dev *pdev)
 	}
 
 	netif_poll_enable(hw->dev[0]);
-	sky2_idle_start(hw);
+
 	return 0;
 out:
 	dev_err(&pdev->dev, "resume failed (%d)\n", err);
@@ -3862,7 +3903,6 @@ static void sky2_shutdown(struct pci_dev *pdev)
 	struct sky2_hw *hw = pci_get_drvdata(pdev);
 	int i, wol = 0;
 
-	del_timer_sync(&hw->idle_timer);
 	netif_poll_disable(hw->dev[0]);
 
 	for (i = 0; i < hw->ports; i++) {
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h
index 5efb5af..6906b81 100644
--- a/drivers/net/sky2.h
+++ b/drivers/net/sky2.h
@@ -1913,6 +1913,14 @@ struct sky2_port {
 	u16		     rx_tag;
 	struct vlan_group    *vlgrp;
 #endif
+	struct {
+		unsigned long last;
+		u32	mac_rp;
+		u8	mac_lev;
+		u8	fifo_rp;
+		u8	fifo_lev;
+	} check;
+
 
 	dma_addr_t	     rx_le_map;
 	dma_addr_t	     tx_le_map;
@@ -1933,6 +1941,15 @@ struct sky2_hw {
 	void __iomem  	     *regs;
 	struct pci_dev	     *pdev;
 	struct net_device    *dev[2];
+	unsigned long        flags;
+#define SKY2_HW_USE_MSI         0x00000001
+#define SKY2_HW_FIBRE_PHY       0x00000002
+#define SKY2_HW_GIGABIT         0x00000004
+#define SKY2_HW_NEWER_PHY       0x00000008
+#define SKY2_HW_RAM_BUFFER      0x00000010
+#define SKY2_HW_NEW_LE          0x00000020	/* new LSOv2 format */
+#define SKY2_HW_AUTO_TX_SUM     0x00000040	/* new IP decode for Tx */
+#define SKY2_HW_ADV_POWER_CTL   0x00000080	/* additional PHY power regs */
 
 	u8	     	     chip_id;
 	u8		     chip_rev;
@@ -1943,7 +1960,7 @@ struct sky2_hw {
 	u32		     st_idx;
 	dma_addr_t   	     st_dma;
 
-	struct timer_list    idle_timer;
+	struct timer_list    watchdog_timer;
 	struct work_struct   restart_work;
 	int		     msi;
 	wait_queue_head_t    msi_wait;