svn commit: r295618 - head/sys/dev/ntb/ntb_hw

Conrad E. Meyer cem at FreeBSD.org
Sun Feb 14 22:37:30 UTC 2016


Author: cem
Date: Sun Feb 14 22:37:28 2016
New Revision: 295618
URL: https://svnweb.freebsd.org/changeset/base/295618

Log:
  NTB: workaround for high traffic hardware hang
  
  This patch comes from Dave Jiang's Linux tree, davejiang/ntb.  It hasn't
  been accepted into Linus' tree, so I do not have an authoritative SHA1
  to point at.  Original commit log:
  
  =====================================================================
  A hardware errata causes the NTB to hang when heavy bi-directional
  traffic in addition to the usage of BAR0/1 (where the registers reside,
  including the doorbell registers to trigger interrupts).
  
  This workaround is only available on Haswell and Broadwell platform.
  The workaround is to enable split BAR in the BIOS to allow the 64bit
  BAR4 to be split into two 32bit BAR4 and BAR5. The BAR4 shall be pointed
  to LAPIC region of the remote host. We will bypass the db mechanism and
  directly trigger the MSIX interrupts. The offsets and vectors are
  exchanged during transport scratch pad negotiation. The scratch pads are
  now overloaded in order to allow the exchange of the information. This
  gets around using the doorbell and prevents the lockup with additional
  pcode changes in BIOS.
  
  Signed-off-by:	Dave Jiang <dave.jiang at intel.com>
  =====================================================================
  
  Notable changes in the FreeBSD version of this patch:
  * The MSIX BAR is configurable, like hw.ntb.b2b_mw_idx (msix_mw_idx).
    The Linux version of the patch only uses BAR4.
  * MSIX negotiation aborts if the link goes down.
  
  Obtained from:	Linux (Dual BSD/GPL driver)
  Sponsored by:	EMC / Isilon Storage Division

Modified:
  head/sys/dev/ntb/ntb_hw/ntb_hw.c
  head/sys/dev/ntb/ntb_hw/ntb_hw.h
  head/sys/dev/ntb/ntb_hw/ntb_regs.h

Modified: head/sys/dev/ntb/ntb_hw/ntb_hw.c
==============================================================================
--- head/sys/dev/ntb/ntb_hw/ntb_hw.c	Sun Feb 14 22:31:38 2016	(r295617)
+++ head/sys/dev/ntb/ntb_hw/ntb_hw.c	Sun Feb 14 22:37:28 2016	(r295618)
@@ -35,6 +35,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pciio.h>
 #include <sys/queue.h>
 #include <sys/rman.h>
 #include <sys/sbuf.h>
@@ -42,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/bus.h>
+#include <machine/intr_machdep.h>
 #include <machine/pmap.h>
 #include <machine/resource.h>
 #include <dev/pci/pcireg.h>
@@ -70,6 +73,19 @@ __FBSDID("$FreeBSD$");
 
 #define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
 
+#define	NTB_MSIX_VER_GUARD	0xaabbccdd
+#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
+#define	ONE_MB			(1024u * 1024)
+
+/*
+ * PCI constants could be somewhere more generic, but aren't defined/used in
+ * pci.c.
+ */
+#define	PCI_MSIX_ENTRY_SIZE		16
+#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
+#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
+#define	PCI_MSIX_ENTRY_DATA		8
+
 enum ntb_device_type {
 	NTB_XEON,
 	NTB_ATOM
@@ -95,6 +111,18 @@ enum ntb_bar {
 	NTB_MAX_BARS
 };
 
+enum {
+	NTB_MSIX_GUARD = 0,
+	NTB_MSIX_DATA0,
+	NTB_MSIX_DATA1,
+	NTB_MSIX_DATA2,
+	NTB_MSIX_OFS0,
+	NTB_MSIX_OFS1,
+	NTB_MSIX_OFS2,
+	NTB_MSIX_DONE,
+	NTB_MAX_MSIX_SPAD
+};
+
 /* Device features and workarounds */
 #define HAS_FEATURE(feature)	\
 	((ntb->features & (feature)) != 0)
@@ -131,6 +159,7 @@ struct ntb_int_info {
 struct ntb_vec {
 	struct ntb_softc	*ntb;
 	uint32_t		num;
+	unsigned		masked;
 };
 
 struct ntb_reg {
@@ -169,6 +198,11 @@ struct ntb_b2b_addr {
 	uint64_t	bar5_addr32;
 };
 
+struct ntb_msix_data {
+	uint32_t	nmd_ofs;
+	uint32_t	nmd_data;
+};
+
 struct ntb_softc {
 	device_t		device;
 	enum ntb_device_type	type;
@@ -178,6 +212,13 @@ struct ntb_softc {
 	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
 	uint32_t		allocated_interrupts;
 
+	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
+	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
+	bool			peer_msix_good;
+	bool			peer_msix_done;
+	struct ntb_pci_bar_info	*peer_lapic_bar;
+	struct callout		peer_msix_work;
+
 	struct callout		heartbeat_timer;
 	struct callout		lr_timer;
 
@@ -198,6 +239,7 @@ struct ntb_softc {
 	/* Memory window used to access peer bar0 */
 #define B2B_MW_DISABLED			UINT8_MAX
 	uint8_t				b2b_mw_idx;
+	uint8_t				msix_mw_idx;
 
 	uint8_t				mw_count;
 	uint8_t				spad_count;
@@ -292,6 +334,8 @@ static inline void db_iowrite(struct ntb
 static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
 static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
 static void ntb_free_msix_vec(struct ntb_softc *ntb);
+static void ntb_get_msix_info(struct ntb_softc *ntb, uint32_t num_vectors);
+static void ntb_exchange_msix(void *);
 static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
 static void ntb_detect_max_mw(struct ntb_softc *ntb);
 static int ntb_detect_xeon(struct ntb_softc *ntb);
@@ -308,7 +352,9 @@ static void xeon_set_pbar_xlat(struct nt
     enum ntb_bar idx);
 static int xeon_setup_b2b_mw(struct ntb_softc *,
     const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
+static int xeon_setup_msix_bar(struct ntb_softc *);
 static inline bool link_is_up(struct ntb_softc *ntb);
+static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
 static inline bool atom_link_is_err(struct ntb_softc *ntb);
 static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
 static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
@@ -395,6 +441,13 @@ ntb_vm_memattr_to_str(vm_memattr_t pat)
 	}
 }
 
+static int g_ntb_msix_idx = 0;
+SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
+    0, "Use this memory window to access the peer MSIX message complex on "
+    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
+    "Like b2b_mw_idx, negative values index from the last available memory "
+    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
+
 static int g_ntb_mw_idx = -1;
 SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
     0, "Use this memory window to access the peer NTB registers.  A "
@@ -593,10 +646,12 @@ ntb_attach(device_t device)
 	ntb->type = p->type;
 	ntb->features = p->features;
 	ntb->b2b_mw_idx = B2B_MW_DISABLED;
+	ntb->msix_mw_idx = B2B_MW_DISABLED;
 
 	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
 	callout_init(&ntb->heartbeat_timer, 1);
 	callout_init(&ntb->lr_timer, 1);
+	callout_init(&ntb->peer_msix_work, 1);
 	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
 	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
 
@@ -621,6 +676,8 @@ ntb_attach(device_t device)
 	if (error != 0)
 		goto out;
 
+	ntb_spad_clear(ntb);
+
 	ntb_poll_link(ntb);
 
 	ntb_sysctl_init(ntb);
@@ -638,10 +695,14 @@ ntb_detach(device_t device)
 
 	ntb = DEVICE2SOFTC(device);
 
-	if (ntb->self_reg != NULL)
-		ntb_db_set_mask(ntb, ntb->db_valid_mask);
+	if (ntb->self_reg != NULL) {
+		DB_MASK_LOCK(ntb);
+		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
+		DB_MASK_UNLOCK(ntb);
+	}
 	callout_drain(&ntb->heartbeat_timer);
 	callout_drain(&ntb->lr_timer);
+	callout_drain(&ntb->peer_msix_work);
 	pci_disable_busmaster(ntb->device);
 	if (ntb->type == NTB_XEON)
 		ntb_teardown_xeon(ntb);
@@ -965,9 +1026,12 @@ ntb_init_isr(struct ntb_softc *ntb)
 	ntb->last_ts = ticks;
 
 	/*
-	 * Mask all doorbell interrupts.
+	 * Mask all doorbell interrupts.  (Except link events!)
 	 */
-	ntb_db_set_mask(ntb, ntb->db_valid_mask);
+	DB_MASK_LOCK(ntb);
+	ntb->db_mask = ntb->db_valid_mask;
+	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	DB_MASK_UNLOCK(ntb);
 
 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
 	    ntb->db_count);
@@ -992,12 +1056,20 @@ ntb_init_isr(struct ntb_softc *ntb)
 		num_vectors = 1;
 
 	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
+		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+			device_printf(ntb->device,
+			    "Errata workaround does not support MSI or INTX\n");
+			return (EINVAL);
+		}
+
 		ntb->db_vec_count = 1;
 		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
 		rc = ntb_setup_legacy_interrupt(ntb);
 	} else {
 		ntb_create_msix_vec(ntb, num_vectors);
 		rc = ntb_setup_msix(ntb, num_vectors);
+		if (rc == 0 && HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+			ntb_get_msix_info(ntb, num_vectors);
 	}
 	if (rc != 0) {
 		device_printf(ntb->device,
@@ -1103,6 +1175,9 @@ void
 ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
 {
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+		return;
+
 	DB_MASK_LOCK(ntb);
 	ntb->db_mask |= bits;
 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
@@ -1118,6 +1193,9 @@ ntb_db_clear_mask(struct ntb_softc *ntb,
 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+		return;
+
 	DB_MASK_LOCK(ntb);
 	ntb->db_mask &= ~bits;
 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
@@ -1128,6 +1206,18 @@ uint64_t
 ntb_db_read(struct ntb_softc *ntb)
 {
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		uint64_t res;
+		unsigned i;
+
+		res = 0;
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if (ntb->msix_vec[i].masked != 0)
+				res |= ntb_db_vector_mask(ntb, i);
+		}
+		return (res);
+	}
+
 	return (db_ioread(ntb, ntb->self_reg->db_bell));
 }
 
@@ -1140,6 +1230,25 @@ ntb_db_clear(struct ntb_softc *ntb, uint
 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		unsigned i;
+
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if ((bits & ntb_db_vector_mask(ntb, i)) != 0) {
+				DB_MASK_LOCK(ntb);
+				if (ntb->msix_vec[i].masked != 0) {
+					/* XXX These need a public API. */
+#if 0
+					pci_unmask_msix(ntb->device, i);
+#endif
+					ntb->msix_vec[i].masked = 0;
+				}
+				DB_MASK_UNLOCK(ntb);
+			}
+		}
+		return;
+	}
+
 	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
 }
 
@@ -1166,6 +1275,19 @@ ntb_interrupt(struct ntb_softc *ntb, uin
 			ntb_link_event(ntb);
 	}
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+	    (vec_mask & ntb->db_link_mask) == 0) {
+		DB_MASK_LOCK(ntb);
+		if (ntb->msix_vec[vec].masked == 0) {
+			/* XXX These need a public API. */
+#if 0
+			pci_mask_msix(ntb->device, vec);
+#endif
+			ntb->msix_vec[vec].masked = 1;
+		}
+		DB_MASK_UNLOCK(ntb);
+	}
+
 	if ((vec_mask & ntb->db_valid_mask) != 0)
 		ntb_db_event(ntb, vec);
 }
@@ -1211,6 +1333,38 @@ ntb_free_msix_vec(struct ntb_softc *ntb)
 	ntb->msix_vec = NULL;
 }
 
+static void
+ntb_get_msix_info(struct ntb_softc *ntb, uint32_t num_vectors)
+{
+	struct pci_devinfo *dinfo;
+	struct pcicfg_msix *msix;
+	uint32_t laddr, data, i, offset;
+
+	dinfo = device_get_ivars(ntb->device);
+	msix = &dinfo->cfg.msix;
+
+	laddr = data = 0;
+
+	for (i = 0; i < num_vectors; i++) {
+		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
+
+		laddr = bus_read_4(msix->msix_table_res, offset +
+		    PCI_MSIX_ENTRY_LOWER_ADDR);
+		ntb_printf(2, "local lower MSIX addr(%u): 0x%x\n", i, laddr);
+
+		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
+		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
+		     MSI_INTEL_ADDR_BASE));
+		ntb->msix_data[i].nmd_ofs = laddr & ~MSI_INTEL_ADDR_BASE;
+
+		data = bus_read_4(msix->msix_table_res, offset +
+		    PCI_MSIX_ENTRY_DATA);
+		ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
+
+		ntb->msix_data[i].nmd_data = data;
+	}
+}
+
 static struct ntb_hw_info *
 ntb_get_device_info(uint32_t device_id)
 {
@@ -1263,9 +1417,12 @@ ntb_detect_xeon(struct ntb_softc *ntb)
 	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
 		ntb->features |= NTB_SPLIT_BAR;
 
-	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
+	/*
+	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
+	 * errata workaround; only do one at a time.
+	 */
 	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
-		ntb->features |= NTB_SDOORBELL_LOCKUP;
+		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
 
 	conn_type = ppd & XEON_PPD_CONN_TYPE;
 	switch (conn_type) {
@@ -1329,19 +1486,28 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
 	ntb->peer_reg = &xeon_b2b_reg;
 	ntb->xlat_reg = &xeon_sec_xlat;
 
-	/*
-	 * There is a Xeon hardware errata related to writes to SDOORBELL or
-	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
-	 * which may hang the system.  To workaround this, use a memory
-	 * window to access the interrupt and scratch pad registers on the
-	 * remote system.
-	 */
-	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
+		    ntb->mw_count;
+		ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
+		    g_ntb_msix_idx, ntb->msix_mw_idx);
+		rc = ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
+		    VM_MEMATTR_UNCACHEABLE);
+		KASSERT(rc == 0, ("shouldn't fail"));
+	} else if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+		/*
+		 * There is a Xeon hardware errata related to writes to SDOORBELL or
+		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
+		 * which may hang the system.  To workaround this, use a memory
+		 * window to access the interrupt and scratch pad registers on the
+		 * remote system.
+		 */
 		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
 		    ntb->mw_count;
 		ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
 		    g_ntb_mw_idx, ntb->b2b_mw_idx);
-		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx, VM_MEMATTR_UNCACHEABLE);
+		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
+		    VM_MEMATTR_UNCACHEABLE);
 		KASSERT(rc == 0, ("shouldn't fail"));
 	} else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
 		/*
@@ -1372,7 +1538,14 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
 	/*
 	 * Mask all doorbell interrupts.
 	 */
-	ntb_db_set_mask(ntb, ntb->db_valid_mask);
+	DB_MASK_LOCK(ntb);
+	ntb->db_mask = ntb->db_valid_mask;
+	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	DB_MASK_UNLOCK(ntb);
+
+	rc = xeon_setup_msix_bar(ntb);
+	if (rc != 0)
+		return (rc);
 
 	rc = ntb_init_isr(ntb);
 	return (rc);
@@ -1475,6 +1648,15 @@ xeon_reset_sbar_size(struct ntb_softc *n
 			bar_sz--;
 		else
 			bar_sz = 0;
+	} else if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+	    ntb_mw_to_bar(ntb, ntb->msix_mw_idx) == idx) {
+		/* Restrict LAPIC BAR to 1MB */
+		pci_write_config(ntb->device, bar->psz_off, 20, 1);
+		pci_write_config(ntb->device, bar->ssz_off, 20, 1);
+		bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
+		bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
+		(void)bar_sz;
+		return;
 	}
 	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
 	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
@@ -1485,28 +1667,37 @@ static void
 xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
     enum ntb_bar idx, enum ntb_bar regbar)
 {
-	uint64_t reg_val;
+	uint64_t reg_val, lmt_addr;
 	uint32_t base_reg, lmt_reg;
 
 	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
 	if (idx == regbar)
 		bar_addr += ntb->b2b_off;
+	lmt_addr = bar_addr;
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+	    ntb_mw_to_bar(ntb, ntb->msix_mw_idx) == idx)
+		lmt_addr += ONE_MB;
+
+	/*
+	 * Set limit registers first to avoid an errata where setting the base
+	 * registers locks the limit registers.
+	 */
 	if (!bar_is_64bit(ntb, idx)) {
-		ntb_reg_write(4, base_reg, bar_addr);
-		reg_val = ntb_reg_read(4, base_reg);
+		ntb_reg_write(4, lmt_reg, lmt_addr);
+		reg_val = ntb_reg_read(4, lmt_reg);
 		(void)reg_val;
 
-		ntb_reg_write(4, lmt_reg, bar_addr);
-		reg_val = ntb_reg_read(4, lmt_reg);
+		ntb_reg_write(4, base_reg, bar_addr);
+		reg_val = ntb_reg_read(4, base_reg);
 		(void)reg_val;
 	} else {
-		ntb_reg_write(8, base_reg, bar_addr);
-		reg_val = ntb_reg_read(8, base_reg);
+		ntb_reg_write(8, lmt_reg, lmt_addr);
+		reg_val = ntb_reg_read(8, lmt_reg);
 		(void)reg_val;
 
-		ntb_reg_write(8, lmt_reg, bar_addr);
-		reg_val = ntb_reg_read(8, lmt_reg);
+		ntb_reg_write(8, base_reg, bar_addr);
+		reg_val = ntb_reg_read(8, base_reg);
 		(void)reg_val;
 	}
 }
@@ -1528,6 +1719,37 @@ xeon_set_pbar_xlat(struct ntb_softc *ntb
 }
 
 static int
+xeon_setup_msix_bar(struct ntb_softc *ntb)
+{
+	struct ntb_pci_bar_info *lapic_bar;
+	enum ntb_bar bar_num;
+	int rc;
+
+	if (!HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+		return (0);
+
+	bar_num = ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
+	lapic_bar = &ntb->bar_info[bar_num];
+
+	/* Restrict LAPIC BAR to 1MB */
+	if (lapic_bar->size > ONE_MB) {
+		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
+		    lapic_bar->pci_resource, lapic_bar->pbase,
+		    lapic_bar->pbase + ONE_MB - 1);
+		if (rc == 0)
+			lapic_bar->size = ONE_MB;
+		else {
+			ntb_printf(0, "Failed to shrink LAPIC BAR resource to "
+			    "1 MB: %d\n", rc);
+			/* Ignore error */
+		}
+	}
+
+	ntb->peer_lapic_bar = lapic_bar;
+	return (0);
+}
+
+static int
 xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
     const struct ntb_b2b_addr *peer_addr)
 {
@@ -1605,6 +1827,43 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb,
 	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
 	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		size_t size, xlatoffset;
+
+		switch (ntb_mw_to_bar(ntb, ntb->msix_mw_idx)) {
+		case NTB_B2B_BAR_1:
+			size = 8;
+			xlatoffset = XEON_SBAR2XLAT_OFFSET;
+			break;
+		case NTB_B2B_BAR_2:
+			xlatoffset = XEON_SBAR4XLAT_OFFSET;
+			if (HAS_FEATURE(NTB_SPLIT_BAR))
+				size = 4;
+			else
+				size = 8;
+			break;
+		case NTB_B2B_BAR_3:
+			xlatoffset = XEON_SBAR5XLAT_OFFSET;
+			size = 4;
+			break;
+		default:
+			KASSERT(false, ("Bogus msix mw idx: %u",
+			    ntb->msix_mw_idx));
+			return (EINVAL);
+		}
+
+		/*
+		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
+		 * workaround
+		 */
+		if (size == 4)
+			ntb_reg_write(4, xlatoffset, MSI_INTEL_ADDR_BASE);
+		else
+			ntb_reg_write(8, xlatoffset, MSI_INTEL_ADDR_BASE);
+	}
+	(void)ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
+	(void)ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
+
 	/* Zero outgoing translation limits (whole bar size windows) */
 	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
 	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
@@ -1642,14 +1901,21 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb,
 }
 
 static inline bool
+_xeon_link_is_up(struct ntb_softc *ntb)
+{
+
+	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
+		return (true);
+	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
+}
+
+static inline bool
 link_is_up(struct ntb_softc *ntb)
 {
 
-	if (ntb->type == NTB_XEON) {
-		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
-			return (true);
-		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
-	}
+	if (ntb->type == NTB_XEON)
+		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
+		    !HAS_FEATURE(NTB_SB01BASE_LOCKUP)));
 
 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
 	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
@@ -1988,6 +2254,19 @@ ntb_poll_link(struct ntb_softc *ntb)
 			return (false);
 
 		ntb->lnk_sta = reg_val;
+
+		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+			if (_xeon_link_is_up(ntb)) {
+				if (!ntb->peer_msix_good) {
+					callout_reset(&ntb->peer_msix_work, 0,
+					    ntb_exchange_msix, ntb);
+					return (false);
+				}
+			} else {
+				ntb->peer_msix_good = false;
+				ntb->peer_msix_done = false;
+			}
+		}
 	}
 	return (true);
 }
@@ -2416,12 +2695,70 @@ static unsigned
 ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
 {
 
-	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
-	    uidx >= ntb->b2b_mw_idx)
-		return (uidx + 1);
+	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
+	    uidx >= ntb->b2b_mw_idx) ||
+	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
+		uidx++;
+	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
+	    uidx >= ntb->b2b_mw_idx) &&
+	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
+		uidx++;
 	return (uidx);
 }
 
+static void
+ntb_exchange_msix(void *ctx)
+{
+	struct ntb_softc *ntb;
+	uint32_t val;
+	unsigned i;
+
+	ntb = ctx;
+
+	if (ntb->peer_msix_done)
+		goto msix_done;
+
+	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+		ntb_peer_spad_write(ntb, NTB_MSIX_DATA0 + i,
+		    ntb->msix_data[i].nmd_data);
+		ntb_peer_spad_write(ntb, NTB_MSIX_OFS0 + i,
+		    ntb->msix_data[i].nmd_ofs);
+	}
+	ntb_peer_spad_write(ntb, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
+
+	ntb_spad_read(ntb, NTB_MSIX_GUARD, &val);
+	if (val != NTB_MSIX_VER_GUARD)
+		goto reschedule;
+
+	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+		ntb_spad_read(ntb, NTB_MSIX_DATA0 + i, &val);
+		ntb->peer_msix_data[i].nmd_data = val;
+		ntb_spad_read(ntb, NTB_MSIX_OFS0 + i, &val);
+		ntb->peer_msix_data[i].nmd_ofs = val;
+	}
+
+	ntb->peer_msix_done = true;
+
+msix_done:
+	ntb_peer_spad_write(ntb, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
+	ntb_spad_read(ntb, NTB_MSIX_DONE, &val);
+	if (val != NTB_MSIX_RECEIVED)
+		goto reschedule;
+
+	ntb->peer_msix_good = true;
+
+	ntb_poll_link(ntb);
+	ntb_link_event(ntb);
+	return;
+
+reschedule:
+	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
+	if (_xeon_link_is_up(ntb))
+		callout_reset(&ntb->peer_msix_work, hz / 100, ntb_exchange_msix, ntb);
+	else
+		ntb_spad_clear(ntb);
+}
+
 /*
  * Public API to the rest of the OS
  */
@@ -2451,10 +2788,14 @@ ntb_get_max_spads(struct ntb_softc *ntb)
 uint8_t
 ntb_mw_count(struct ntb_softc *ntb)
 {
+	uint8_t res;
 
+	res = ntb->mw_count;
 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
-		return (ntb->mw_count - 1);
-	return (ntb->mw_count);
+		res--;
+	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
+		res--;
+	return (res);
 }
 
 /**
@@ -2480,6 +2821,18 @@ ntb_spad_write(struct ntb_softc *ntb, un
 	return (0);
 }
 
+/*
+ * Zeros the local scratchpad.
+ */
+void
+ntb_spad_clear(struct ntb_softc *ntb)
+{
+	unsigned i;
+
+	for (i = 0; i < ntb->spad_count; i++)
+		ntb_spad_write(ntb, i, 0);
+}
+
 /**
  * ntb_spad_read() - read from the primary scratchpad register
  * @ntb: pointer to ntb_softc instance
@@ -2808,6 +3161,22 @@ void
 ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
 {
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		struct ntb_pci_bar_info *lapic;
+		unsigned i;
+
+		lapic = ntb->peer_lapic_bar;
+
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if ((bit & ntb_db_vector_mask(ntb, i)) != 0)
+				bus_space_write_4(lapic->pci_bus_tag,
+				    lapic->pci_bus_handle,
+				    ntb->peer_msix_data[i].nmd_ofs,
+				    ntb->peer_msix_data[i].nmd_data);
+		}
+		return;
+	}
+
 	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
 		ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
 		return;

Modified: head/sys/dev/ntb/ntb_hw/ntb_hw.h
==============================================================================
--- head/sys/dev/ntb/ntb_hw/ntb_hw.h	Sun Feb 14 22:31:38 2016	(r295617)
+++ head/sys/dev/ntb/ntb_hw/ntb_hw.h	Sun Feb 14 22:37:28 2016	(r295618)
@@ -86,6 +86,7 @@ int ntb_mw_get_wc(struct ntb_softc *, un
 int ntb_mw_set_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t mode);
 
 uint8_t ntb_get_max_spads(struct ntb_softc *ntb);
+void ntb_spad_clear(struct ntb_softc *ntb);
 int ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val);
 int ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val);
 int ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx,

Modified: head/sys/dev/ntb/ntb_hw/ntb_regs.h
==============================================================================
--- head/sys/dev/ntb/ntb_hw/ntb_regs.h	Sun Feb 14 22:31:38 2016	(r295617)
+++ head/sys/dev/ntb/ntb_hw/ntb_regs.h	Sun Feb 14 22:37:28 2016	(r295618)
@@ -44,6 +44,7 @@
 #define XEON_DB_MSIX_VECTOR_COUNT	4
 #define XEON_DB_MSIX_VECTOR_SHIFT	5
 #define XEON_DB_LINK_BIT	(1 << XEON_DB_LINK)
+#define XEON_NONLINK_DB_MSIX_BITS	3
 
 #define XEON_SPCICMD_OFFSET	0x0504
 #define XEON_DEVCTRL_OFFSET	0x0598


More information about the svn-src-all mailing list