Regression in 8.2-STABLE bge code (from 7.4-STABLE)

YongHyeon PYUN pyunyh at gmail.com
Sat Feb 25 05:13:08 UTC 2012


On Thu, Feb 23, 2012 at 09:46:20AM -0500, John Baldwin wrote:
> On Tuesday, February 14, 2012 7:56:00 pm YongHyeon PYUN wrote:
> > On Sat, Jan 28, 2012 at 09:24:53PM -0500, Michael L. Squires wrote:
> > 
> > Sorry for late reply.  Had been busy due to relocation.
> > 
> > > There is a bug in the Tyan S4881/S4882 PCI-X bridges that was fixed with a 
> > > patch in 7.x (thank you very much).  This patch is not present in the 
> > > 8.2-STABLE code and the symptoms (watchdog timeouts) have recurred.
> > > 
> > 
> > Hmm, I thought the mailbox reordering bug was avoided by limiting
> > DMA address space to 32bits but it seems it was not right workaround
> > for AMD 8131 PCI-X Bridge.
> > 
> > > The watchdog timeouts do not appear to be present after I switched to an 
> > > Intel gigabit PCI-X card.
> > > 
> > > I did a brute-force patch of the 8.2-STABLE bge code using the patches for
> > > 7.4-STABLE; the resulting code compiled and, other than odd behavior at
> > > startup, seems to be working normally.
> > > 
> > > This is using FreeBSD 8.2-STABLE amd64; I don't know what happens with 
> > > i386.
> > > 
> > > Given the age of the boards it may be easier if I just continue using the
> > > Intel gigabit card but am happy to test anything that comes my way.
> > > 
> > 
> > Try attached patch and let me know how it goes.
> > I didn't enable 64bit DMA addressing though. I think the AMD-8131
> > PCI-X bridge needs both workarounds.
> 
> Eh, please don't do the thing where you walk all pcib devices.  Instead, walk 
> up the tree like so:
> 
> static int
> bge_mbox_reorder(struct bge_softc *sc)
> {
> 	devclass_t pcib, pci;
> 	device_t dev, bus;
> 
> 	pci = devclass_find("pci");
> 	pcib = devclass_find("pcib");
> 	dev = sc->dev;
> 	bus = device_get_parent(dev);
> 	for (;;) {
> 		dev = device_get_parent(bus);
> 		bus = device_get_parent(dev);
> 		if (device_get_devclass(dev) != pcib_devclass ||
> 		    device_get_devclass(bus) != pci_devclass)
> 			break;
> 		/* Probe device ID. */
> 	}
> 	return (0);
> }
> 
> It is not safe to use pci_get_vendor() with non-PCI devices (you may get
> random junk, and Host-PCI bridges are not PCI devices).  Also, this will only
> apply the quirk if a relevant bridge is in the bge device's path.
> 

Thanks for reviewing and suggestion.
Would you review updated one?
-------------- next part --------------
Index: sys/dev/bge/if_bgereg.h
===================================================================
--- sys/dev/bge/if_bgereg.h	(revision 232144)
+++ sys/dev/bge/if_bgereg.h	(working copy)
@@ -2828,6 +2828,7 @@
 #define	BGE_FLAG_RX_ALIGNBUG	0x04000000
 #define	BGE_FLAG_SHORT_DMA_BUG	0x08000000
 #define	BGE_FLAG_4K_RDMA_BUG	0x10000000
+#define	BGE_FLAG_MBOX_REORDER	0x20000000
 	uint32_t		bge_phy_flags;
 #define	BGE_PHY_NO_WIRESPEED	0x00000001
 #define	BGE_PHY_ADC_BUG		0x00000002
Index: sys/dev/bge/if_bge.c
===================================================================
--- sys/dev/bge/if_bge.c	(revision 232144)
+++ sys/dev/bge/if_bge.c	(working copy)
@@ -380,6 +380,8 @@
 static int bge_dma_ring_alloc(struct bge_softc *, bus_size_t, bus_size_t,
     bus_dma_tag_t *, uint8_t **, bus_dmamap_t *, bus_addr_t *, const char *);
 
+static int bge_mbox_reorder(struct bge_softc *);
+
 static int bge_get_eaddr_fw(struct bge_softc *sc, uint8_t ether_addr[]);
 static int bge_get_eaddr_mem(struct bge_softc *, uint8_t[]);
 static int bge_get_eaddr_nvram(struct bge_softc *, uint8_t[]);
@@ -635,6 +637,8 @@
 		off += BGE_LPMBX_IRQ0_HI - BGE_MBX_IRQ0_HI;
 
 	CSR_WRITE_4(sc, off, val);
+	if ((sc->bge_flags & BGE_FLAG_MBOX_REORDER) != 0)
+		CSR_READ_4(sc, off);
 }
 
 /*
@@ -2609,8 +2613,8 @@
 		 * XXX
 		 * watchdog timeout issue was observed on BCM5704 which
 		 * lives behind PCI-X bridge(e.g AMD 8131 PCI-X bridge).
-		 * Limiting DMA address space to 32bits seems to address
-		 * it.
+		 * Both limiting DMA address space to 32bits and flushing
+		 * mailbox write seem to address the issue.
 		 */
 		if (sc->bge_flags & BGE_FLAG_PCIX)
 			lowaddr = BUS_SPACE_MAXADDR_32BIT;
@@ -2775,6 +2779,47 @@
 }
 
 static int
+bge_mbox_reorder(struct bge_softc *sc)
+{
+	/* Lists of PCI bridges that are known to reorder mailbox writes. */
+	static const struct mbox_reorder {
+		const uint16_t vendor;
+		const uint16_t device;
+		const char *desc;
+	} const mbox_reorder_lists[] = {
+		{ 0x1022, 0x7450, "AMD-8131 PCI-X Bridge" },
+	};
+	devclass_t pci, pcib;
+	device_t bus, dev;
+	int count, i;
+
+	count = sizeof(mbox_reorder_lists) / sizeof(mbox_reorder_lists[0]);
+	pci = devclass_find("pci");
+	pcib = devclass_find("pcib");
+	dev = sc->bge_dev;
+	bus = device_get_parent(dev);
+	for (;;) {
+		dev = device_get_parent(bus);
+		bus = device_get_parent(dev);
+		if (device_get_devclass(dev) != pcib ||
+		    device_get_devclass(bus) != pci)
+			break;
+		for (i = 0; i < count; i++) {
+			if (pci_get_vendor(dev) ==
+			    mbox_reorder_lists[i].vendor &&
+			    pci_get_device(dev) ==
+			    mbox_reorder_lists[i].device) {
+				device_printf(sc->bge_dev,
+				    "enabling MBOX workaround for %s\n",
+				    mbox_reorder_lists[i].desc);
+				return (1);
+			}
+		}
+	}
+	return (0);
+}
+
+static int
 bge_attach(device_t dev)
 {
 	struct ifnet *ifp;
@@ -3094,6 +3139,14 @@
 	if (BGE_IS_5714_FAMILY(sc) && (sc->bge_flags & BGE_FLAG_PCIX))
 		sc->bge_flags |= BGE_FLAG_40BIT_BUG;
 	/*
+	 * Some PCI-X bridges are known to trigger write reordering to
+	 * the mailbox registers. Typical phenomena is watchdog timeouts
+	 * caused by out-of-order TX completions.  Enable workaround for
+	 * PCI-X devices that live behind these bridges.
+	 */
+	if (sc->bge_flags & BGE_FLAG_PCIX && bge_mbox_reorder(sc) != 0)
+		sc->bge_flags |= BGE_FLAG_MBOX_REORDER;
+	/*
 	 * Allocate the interrupt, using MSI if possible.  These devices
 	 * support 8 MSI messages, but only the first one is used in
 	 * normal operation.


More information about the freebsd-stable mailing list