git: b941d1c64e58 - main - sym(4): Map HCB memory as uncacheable also on x86

From: Marius Strobl <marius_at_FreeBSD.org>
Date: Mon, 02 Feb 2026 22:01:52 UTC
The branch main has been updated by marius:

URL: https://cgit.FreeBSD.org/src/commit/?id=b941d1c64e58708c93621cc07ed1c8e5e709cd48

commit b941d1c64e58708c93621cc07ed1c8e5e709cd48
Author:     Marius Strobl <marius@FreeBSD.org>
AuthorDate: 2026-02-02 21:30:42 +0000
Commit:     Marius Strobl <marius@FreeBSD.org>
CommitDate: 2026-02-02 21:57:56 +0000

    sym(4): Map HCB memory as uncacheable also on x86
    
    As part of making the chip-specific mix and match of different accesses
    (DMA/bus space) work as desired, the intent is to map the HCB memory as
    uncacheable. Prior to VM_MEMATTR_*, the !x86 way of indicating this to
    bus_dmamem_alloc(9) was BUS_DMA_COHERENT. Then later on in 2db99100a4,
    BUS_DMA_NOCACHE was hooked up to VM_MEMATTR_UNCACHEABLE for x86. As it
    turns out, still as of today bus_dmamem_alloc(9) differs in this regard
    across architectures. On arm, it still supports BUS_DMA_COHERENT only
    for requesting uncacheable DMA and x86 still uses BUS_DMA_NOCACHE only.
    On arm64 and riscv, BUS_DMA_COHERENT seems to effectively be an alias
    for BUS_DMA_NOCACHE.
    
    Thus, allocate the HCB memory with BUS_DMA_COHERENT | BUS_DMA_NOCACHE,
    so we get uncacheable memory on all architectures including x86 and so
    loads and stores from/to HCB won't get reordered. However, even on x86
    we still need to use at least compiler barriers to achieve the desired
    program order.
    
    This change should also fix panics due to out-of-sync data seen with
    FreeBSD VMs on top of OpenStack and HBAs of type lsiLogic as a result
    of loads and stores getting reordered. [1]
    
    While at it:
    - Nuke the unused SYM_DRIVER_NAME macro.
    - Remove unused/redundant HCB members and correct a comment typo.
    
    PR:             270816 [1]
    MFC after:      3 days
---
 sys/dev/sym/sym_hipd.c | 46 +++++++++++++++++++---------------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/sys/dev/sym/sym_hipd.c b/sys/dev/sym/sym_hipd.c
index 0e51607fb07a..1399eadb21fb 100644
--- a/sys/dev/sym/sym_hipd.c
+++ b/sys/dev/sym/sym_hipd.c
@@ -58,7 +58,6 @@
  */
 
 #include <sys/cdefs.h>
-#define SYM_DRIVER_NAME	"sym-1.6.5-20000902"
 
 /* #define SYM_DEBUG_GENERIC_SUPPORT */
 
@@ -114,17 +113,15 @@ typedef	u_int32_t u32;
 #include <dev/sym/sym_fw.h>
 
 /*
- *  IA32 architecture does not reorder STORES and prevents
- *  LOADS from passing STORES. It is called `program order'
- *  by Intel and allows device drivers to deal with memory
- *  ordering by only ensuring that the code is not reordered
- *  by the compiler when ordering is required.
- *  Other architectures implement a weaker ordering that
- *  requires memory barriers (and also IO barriers when they
- *  make sense) to be used.
+ * With uncacheable memory, x86 does not reorder STORES and prevents LOADS
+ * from passing STORES.  For ensuring this program order, we still need to
+ * employ compiler barriers, though, when the ordering of LOADS and STORES
+ * matters.
+ * Other architectures may implement weaker ordering guarantees and, thus,
+ * require memory barriers (and also IO barriers) to be used.
  */
 #if	defined	__i386__ || defined __amd64__
-#define MEMORY_BARRIER()	do { ; } while(0)
+#define	MEMORY_BARRIER()	__compiler_membar()
 #elif	defined	__powerpc__
 #define MEMORY_BARRIER()	__asm__ volatile("eieio; sync" : : : "memory")
 #elif	defined	__arm__
@@ -594,10 +591,10 @@ static m_addr_t ___dma_getp(m_pool_s *mp)
 		goto out_err;
 
 	if (bus_dmamem_alloc(mp->dmat, &vaddr,
-			BUS_DMA_COHERENT | BUS_DMA_WAITOK, &vbp->dmamap))
+	    BUS_DMA_COHERENT | BUS_DMA_NOCACHE | BUS_DMA_WAITOK, &vbp->dmamap))
 		goto out_err;
-	bus_dmamap_load(mp->dmat, vbp->dmamap, vaddr,
-			MEMO_CLUSTER_SIZE, getbaddrcb, &baddr, BUS_DMA_NOWAIT);
+	bus_dmamap_load(mp->dmat, vbp->dmamap, vaddr, MEMO_CLUSTER_SIZE,
+	    getbaddrcb, &baddr, BUS_DMA_NOWAIT);
 	if (baddr) {
 		int hc = VTOB_HASH_CODE(vaddr);
 		vbp->vaddr = (m_addr_t) vaddr;
@@ -1484,7 +1481,7 @@ struct sym_hcb {
 	u32	scr_ram_seg;
 
 	/*
-	 *  Chip and controller indentification.
+	 *  Chip and controller identification.
 	 */
 	device_t device;
 
@@ -1534,7 +1531,6 @@ struct sym_hcb {
 	struct resource	*io_res;
 	struct resource	*mmio_res;
 	struct resource	*ram_res;
-	int		ram_id;
 	void *intr;
 
 	/*
@@ -1558,8 +1554,6 @@ struct sym_hcb {
 	 *  BUS addresses of the chip
 	 */
 	vm_offset_t	mmio_ba;	/* MMIO BUS address		*/
-	int		mmio_ws;	/* MMIO Window size		*/
-
 	vm_offset_t	ram_ba;		/* RAM BUS address		*/
 	int		ram_ws;		/* RAM window size		*/
 
@@ -8528,16 +8522,15 @@ sym_pci_attach(device_t dev)
 	 *  Alloc/get/map/retrieve the corresponding resources.
 	 */
 	if (np->features & (FE_RAM|FE_RAM8K)) {
-		int regs_id = SYM_PCI_RAM;
+		i = SYM_PCI_RAM;
 		if (np->features & FE_64BIT)
-			regs_id = SYM_PCI_RAM64;
-		np->ram_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
-						     &regs_id, RF_ACTIVE);
+			i = SYM_PCI_RAM64;
+		np->ram_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &i,
+		    RF_ACTIVE);
 		if (!np->ram_res) {
 			device_printf(dev,"failed to allocate RAM resources\n");
 			goto attach_failed;
 		}
-		np->ram_id  = regs_id;
 		np->ram_ba = rman_get_start(np->ram_res);
 	}
 
@@ -8772,16 +8765,15 @@ sym_pci_detach(device_t dev)
 	 */
 	if (np->ram_res)
 		bus_release_resource(np->device, SYS_RES_MEMORY,
-				     np->ram_id, np->ram_res);
+		    rman_get_rid(np->ram_res), np->ram_res);
 	if (np->mmio_res)
 		bus_release_resource(np->device, SYS_RES_MEMORY,
-				     SYM_PCI_MMIO, np->mmio_res);
+		    rman_get_rid(np->mmio_res), np->mmio_res);
 	if (np->io_res)
 		bus_release_resource(np->device, SYS_RES_IOPORT,
-				     SYM_PCI_IO, np->io_res);
+		    rman_get_rid(np->io_res), np->io_res);
 	if (np->irq_res)
-		bus_release_resource(np->device, SYS_RES_IRQ,
-				     0, np->irq_res);
+		bus_release_resource(np->device, SYS_RES_IRQ, 0, np->irq_res);
 
 	if (np->scriptb0)
 		sym_mfree_dma(np->scriptb0, np->scriptb_sz, "SCRIPTB0");