svn commit: r244684 - in user/attilio/membarclean: dev/bxe dev/cxgb dev/cxgb/sys dev/cxgb/ulp/iw_cxgb dev/cxgbe dev/e1000 dev/ixgbe dev/netmap dev/sfxge/common ofed/include/linux sys

Attilio Rao attilio at FreeBSD.org
Tue Dec 25 16:36:42 UTC 2012


Author: attilio
Date: Tue Dec 25 16:36:38 2012
New Revision: 244684
URL: http://svnweb.freebsd.org/changeset/base/244684

Log:
  Generalize the prefetch concept into an MI interface.
  Apparently (given the ofed support) Linux has already a similar
  interface, which doesn't take into account all the cache levels and
  purpose.
  
  Cache levels are meaningful also in x86 case and will certainly be
  even more in embedded world, thus I think it is important to specify
  those.
  
  Unofficial doc seems to say that __builtin_prefetch() was added in
  gcc 3.0 but I couldn't find any official documentation stating that.
  
  - Maybe we need doc for the PRFTCH_* flags?
  - Maybe we want to retain compat with Linux but having underoptimized
    behaviour?
  - Why we don't use __clang__ checks __has_builtin() proficiently?
  
  Requested by:	alc

Modified:
  user/attilio/membarclean/dev/bxe/if_bxe.h
  user/attilio/membarclean/dev/cxgb/cxgb_osdep.h
  user/attilio/membarclean/dev/cxgb/cxgb_sge.c
  user/attilio/membarclean/dev/cxgb/sys/mvec.h
  user/attilio/membarclean/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h
  user/attilio/membarclean/dev/cxgbe/adapter.h
  user/attilio/membarclean/dev/cxgbe/t4_sge.c
  user/attilio/membarclean/dev/e1000/e1000_osdep.h
  user/attilio/membarclean/dev/ixgbe/ixgbe.c
  user/attilio/membarclean/dev/ixgbe/ixgbe_osdep.h
  user/attilio/membarclean/dev/ixgbe/ixv.c
  user/attilio/membarclean/dev/netmap/ixgbe_netmap.h
  user/attilio/membarclean/dev/netmap/netmap.c
  user/attilio/membarclean/dev/sfxge/common/efsys.h
  user/attilio/membarclean/ofed/include/linux/list.h
  user/attilio/membarclean/sys/buf_ring.h
  user/attilio/membarclean/sys/cdefs.h

Modified: user/attilio/membarclean/dev/bxe/if_bxe.h
==============================================================================
--- user/attilio/membarclean/dev/bxe/if_bxe.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/bxe/if_bxe.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -1813,12 +1813,6 @@ struct bxe_softc {
 #endif
 #endif
 
-static __inline void
-prefetch(void *x)
-{
-	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
-}
-
 #define	BXE_RX_ALIGN		(1 << BXE_RX_ALIGN_SHIFT)
 
 #define	PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & (~PAGE_MASK))

Modified: user/attilio/membarclean/dev/cxgb/cxgb_osdep.h
==============================================================================
--- user/attilio/membarclean/dev/cxgb/cxgb_osdep.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/cxgb/cxgb_osdep.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -117,12 +117,6 @@ struct t3_mbuf_hdr {
 						*/
 #if defined(__i386__) || defined(__amd64__)  
 
-static __inline
-void prefetch(void *x) 
-{ 
-        __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
-}
-
 #define smp_mb() mb()
 
 #define L1_CACHE_BYTES 128
@@ -137,10 +131,11 @@ extern void kdb_backtrace(void);
 
 #else 
 #define smp_mb()
-#define prefetch(x)
 #define L1_CACHE_BYTES 32
 #endif
 
+#define	cxgb_prefetch(x)	prefetch(x, PRFTCH_RD, PRFTCH_L3)
+
 #define DBG_RX          (1 << 0)
 static const int debug_flags = DBG_RX;
 

Modified: user/attilio/membarclean/dev/cxgb/cxgb_sge.c
==============================================================================
--- user/attilio/membarclean/dev/cxgb/cxgb_sge.c	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/cxgb/cxgb_sge.c	Tue Dec 25 16:36:38 2012	(r244684)
@@ -1381,7 +1381,7 @@ t3_encap(struct sge_qset *qs, struct mbu
 	txsd = &txq->sdesc[txq->pidx];
 	sgl = txq->txq_sgl;
 
-	prefetch(txd);
+	cxgb_prefetch(txd);
 	m0 = *m;
 
 	mtx_assert(&qs->lock, MA_OWNED);
@@ -2139,8 +2139,8 @@ t3_free_tx_desc(struct sge_qset *qs, int
 
 	mtx_assert(&qs->lock, MA_OWNED);
 	while (reclaimable--) {
-		prefetch(q->sdesc[(cidx + 1) & mask].m);
-		prefetch(q->sdesc[(cidx + 2) & mask].m);
+		cxgb_prefetch(q->sdesc[(cidx + 1) & mask].m);
+		cxgb_prefetch(q->sdesc[(cidx + 2) & mask].m);
 
 		if (txsd->m != NULL) {
 			if (txsd->flags & TX_SW_DESC_MAPPED) {
@@ -2700,10 +2700,10 @@ get_packet(adapter_t *adap, unsigned int
 	int ret = 0;
 
 	mask = fl->size - 1;
-	prefetch(fl->sdesc[(cidx + 1) & mask].m);
-	prefetch(fl->sdesc[(cidx + 2) & mask].m);
-	prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
-	prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);	
+	cxgb_prefetch(fl->sdesc[(cidx + 1) & mask].m);
+	cxgb_prefetch(fl->sdesc[(cidx + 2) & mask].m);
+	cxgb_prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
+	cxgb_prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);	
 
 	fl->credits--;
 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);

Modified: user/attilio/membarclean/dev/cxgb/sys/mvec.h
==============================================================================
--- user/attilio/membarclean/dev/cxgb/sys/mvec.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/cxgb/sys/mvec.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -58,7 +58,7 @@ m_freem_list(struct mbuf *m)
 	while (m != NULL) {
 		n = m->m_nextpkt;
 		if (n != NULL)
-			prefetch(n);
+			cxgb_prefetch(n);
 		m_freem(m);
 		m = n;
 	}	

Modified: user/attilio/membarclean/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h
==============================================================================
--- user/attilio/membarclean/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -3,7 +3,7 @@
 
 /* $FreeBSD$ */
 
-#undef prefetch
+#undef cxgb_prefetch
 #undef WARN_ON
 #undef max_t
 #undef udelay

Modified: user/attilio/membarclean/dev/cxgbe/adapter.h
==============================================================================
--- user/attilio/membarclean/dev/cxgbe/adapter.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/cxgbe/adapter.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -57,15 +57,7 @@ MALLOC_DECLARE(M_CXGBE);
 #define CXGBE_UNIMPLEMENTED(s) \
     panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__)
 
-#if defined(__i386__) || defined(__amd64__)
-static __inline void
-prefetch(void *x)
-{
-	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
-}
-#else
-#define prefetch(x)
-#endif
+#define	cxgbe_prefetch(x)	prefetch(x, PRFTCH_RD, PRFTCH_L3)
 
 #ifndef SYSCTL_ADD_UQUAD
 #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD

Modified: user/attilio/membarclean/dev/cxgbe/t4_sge.c
==============================================================================
--- user/attilio/membarclean/dev/cxgbe/t4_sge.c	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/cxgbe/t4_sge.c	Tue Dec 25 16:36:38 2012	(r244684)
@@ -1300,8 +1300,8 @@ t4_eth_tx(struct ifnet *ifp, struct sge_
 	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
 	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
 
-	prefetch(&eq->desc[eq->pidx]);
-	prefetch(&txq->sdesc[eq->pidx]);
+	cxgbe_prefetch(&eq->desc[eq->pidx]);
+	cxgbe_prefetch(&txq->sdesc[eq->pidx]);
 
 	txpkts.npkt = 0;/* indicates there's nothing in txpkts */
 	coalescing = 0;
@@ -3373,7 +3373,7 @@ reclaim_tx_descs(struct sge_txq *txq, in
 	txmaps = &txq->txmaps;
 	txm = &txmaps->maps[txmaps->map_cidx];
 	if (maps)
-		prefetch(txm->m);
+		cxgbe_prefetch(txm->m);
 
 	eq->avail += reclaimed;
 	KASSERT(eq->avail < eq->cap,	/* avail tops out at (cap - 1) */
@@ -3389,7 +3389,7 @@ reclaim_tx_descs(struct sge_txq *txq, in
 		next = txm + 1;
 		if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
 			next = txmaps->maps;
-		prefetch(next->m);
+		cxgbe_prefetch(next->m);
 
 		bus_dmamap_unload(txq->tx_tag, txm->map);
 		m_freem(txm->m);

Modified: user/attilio/membarclean/dev/e1000/e1000_osdep.h
==============================================================================
--- user/attilio/membarclean/dev/e1000/e1000_osdep.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/e1000/e1000_osdep.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -107,16 +107,6 @@ typedef boolean_t	bool;
 #define __le32		u32
 #define __le64		u64
 
-#if defined(__i386__) || defined(__amd64__)
-static __inline
-void prefetch(void *x)
-{
-	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
-}
-#else
-#define prefetch(x)
-#endif
-
 struct e1000_osdep
 {
 	bus_space_tag_t    mem_bus_space_tag;

Modified: user/attilio/membarclean/dev/ixgbe/ixgbe.c
==============================================================================
--- user/attilio/membarclean/dev/ixgbe/ixgbe.c	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/ixgbe/ixgbe.c	Tue Dec 25 16:36:38 2012	(r244684)
@@ -3654,7 +3654,7 @@ ixgbe_txeof(struct tx_ring *txr)
 			buf = txr->tx_buffers;
 			txd = txr->tx_base;
 		}
-		prefetch(txd);
+		ixgbe_prefetch(txd);
 	} while (__predict_true(--limit));
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
@@ -4447,7 +4447,7 @@ ixgbe_rxeof(struct ix_queue *que)
 					nextp = 0;
 			}
 			nbuf = &rxr->rx_buffers[nextp];
-			prefetch(nbuf);
+			ixgbe_prefetch(nbuf);
 		}
 		/*
 		** Rather than using the fmp/lmp global pointers

Modified: user/attilio/membarclean/dev/ixgbe/ixgbe_osdep.h
==============================================================================
--- user/attilio/membarclean/dev/ixgbe/ixgbe_osdep.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/ixgbe/ixgbe_osdep.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -136,15 +136,7 @@ typedef boolean_t	bool;
 #endif
 #endif
 
-#if defined(__i386__) || defined(__amd64__)
-static __inline
-void prefetch(void *x)
-{
-	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
-}
-#else
-#define prefetch(x)
-#endif
+#define	ixgbe_prefetch(x)	prefetch(x, PRFTCH_RD, PRFTCH_L3)
 
 /*
  * Optimized bcopy thanks to Luigi Rizzo's investigative work.  Assumes

Modified: user/attilio/membarclean/dev/ixgbe/ixv.c
==============================================================================
--- user/attilio/membarclean/dev/ixgbe/ixv.c	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/ixgbe/ixv.c	Tue Dec 25 16:36:38 2012	(r244684)
@@ -3367,7 +3367,7 @@ ixv_rxeof(struct ix_queue *que, int coun
 			if (nextp == adapter->num_rx_desc)
 				nextp = 0;
 			nbuf = &rxr->rx_buffers[nextp];
-			prefetch(nbuf);
+			ixgbe_prefetch(nbuf);
 		}
 		/*
 		** The header mbuf is ONLY used when header 

Modified: user/attilio/membarclean/dev/netmap/ixgbe_netmap.h
==============================================================================
--- user/attilio/membarclean/dev/netmap/ixgbe_netmap.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/netmap/ixgbe_netmap.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -263,9 +263,9 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u
 	 */
 	j = kring->nr_hwcur;
 	if (j != k) {	/* we have new packets to send */
-		prefetch(&ring->slot[j]);
+		ixgbe_prefetch(&ring->slot[j]);
 		l = netmap_idx_k2n(kring, j); /* NIC index */
-		prefetch(&txr->tx_buffers[l]);
+		ixgbe_prefetch(&txr->tx_buffers[l]);
 		for (n = 0; j != k; n++) {
 			/*
 			 * Collect per-slot info.
@@ -294,8 +294,8 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u
 
 			j = (j == lim) ? 0 : j + 1;
 			l = (l == lim) ? 0 : l + 1;
-			prefetch(&ring->slot[j]);
-			prefetch(&txr->tx_buffers[l]);
+			ixgbe_prefetch(&ring->slot[j]);
+			ixgbe_prefetch(&txr->tx_buffers[l]);
 
 			/*
 			 * Quick check for valid addr and len.

Modified: user/attilio/membarclean/dev/netmap/netmap.c
==============================================================================
--- user/attilio/membarclean/dev/netmap/netmap.c	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/netmap/netmap.c	Tue Dec 25 16:36:38 2012	(r244684)
@@ -158,7 +158,6 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, bridge
 #include <sys/endian.h>
 #include <sys/refcount.h>
 #endif /* __FreeBSD__ */
-#define prefetch(x)	__builtin_prefetch(x)
 #endif /* !linux */
 
 static void bdg_netmap_attach(struct ifnet *ifp);
@@ -2133,7 +2132,7 @@ bdg_netmap_txsync(struct ifnet *ifp, u_i
 		int len = ft[ft_i].len = slot->len;
 		char *buf = ft[ft_i].buf = NMB(slot);
 
-		prefetch(buf);
+		prefetch(buf, PRFTCH_RD, PRFTCH_L3);
 		if (unlikely(len < 14))
 			continue;
 		if (unlikely(++ft_i == netmap_bridge))

Modified: user/attilio/membarclean/dev/sfxge/common/efsys.h
==============================================================================
--- user/attilio/membarclean/dev/sfxge/common/efsys.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/dev/sfxge/common/efsys.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -95,60 +95,19 @@ extern "C" {
 /* Memory type to use on FreeBSD */
 MALLOC_DECLARE(M_SFXGE);
 
-/* Machine dependend prefetch wrappers */
-#if defined(__i386__) || defined(__amd64__)
 static __inline void
 prefetch_read_many(void *addr)
 {
 
-	__asm__(
-	    "prefetcht0 (%0)"
-	    :
-	    : "r" (addr));
+	prefetch(addr, PRFTCH_RD, PRFTCH_L3);
 }
 
 static __inline void
 prefetch_read_once(void *addr)
 {
 
-	__asm__(
-	    "prefetchnta (%0)"
-	    :
-	    : "r" (addr));
+	prefetch(addr, PRFTCH_RD, PRFTCH_L0);
 }
-#elif defined(__sparc64__)
-static __inline void
-prefetch_read_many(void *addr)
-{
-
-	__asm__(
-	    "prefetch [%0], 0"
-	    :
-	    : "r" (addr));
-}
-
-static __inline void
-prefetch_read_once(void *addr)
-{
-
-	__asm__(
-	    "prefetch [%0], 1"
-	    :
-	    : "r" (addr));
-}
-#else
-static __inline void
-prefetch_read_many(void *addr)
-{
-
-}
-
-static __inline void
-prefetch_read_once(void *addr)
-{
-
-}
-#endif
 
 #if defined(__i386__) || defined(__amd64__)
 #include <vm/vm.h>

Modified: user/attilio/membarclean/ofed/include/linux/list.h
==============================================================================
--- user/attilio/membarclean/ofed/include/linux/list.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/ofed/include/linux/list.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -61,6 +61,9 @@
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 
+#ifdef prefetch
+#undef	prefetch
+#endif
 #define	prefetch(x)
 
 struct list_head {

Modified: user/attilio/membarclean/sys/buf_ring.h
==============================================================================
--- user/attilio/membarclean/sys/buf_ring.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/sys/buf_ring.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -193,9 +193,10 @@ buf_ring_dequeue_sc(struct buf_ring *br)
 
 #ifdef PREFETCH_DEFINED	
 	if (cons_next != prod_tail) {		
-		prefetch(br->br_ring[cons_next]);
+		prefetch(br->br_ring[cons_next], PRFTCH_RD, PRFTCH_L3);
 		if (cons_next_next != prod_tail) 
-			prefetch(br->br_ring[cons_next_next]);
+			prefetch(br->br_ring[cons_next_next], PRFTCH_RD,
+			    PRFTCH_L3);
 	}
 #endif
 	br->br_cons_head = cons_next;

Modified: user/attilio/membarclean/sys/cdefs.h
==============================================================================
--- user/attilio/membarclean/sys/cdefs.h	Tue Dec 25 14:29:38 2012	(r244683)
+++ user/attilio/membarclean/sys/cdefs.h	Tue Dec 25 16:36:38 2012	(r244684)
@@ -229,6 +229,20 @@
 #define	__alignof(x)	__offsetof(struct { char __a; x __b; }, __b)
 #endif
 
+#if defined(__GNUC__) && 						\
+    ((defined(__clang__) && __has_builtin(__builtin_prefetch)) ||	\
+    __GNUC_PREREQ__(3, 0))
+#define	PRFTCH_RD	0
+#define	PRFTCH_WR	1
+
+#define	PRFTCH_L0	0
+#define	PRFTCH_L1	1
+#define	PRFTCH_L2	2
+#define	PRFTCH_L3	3
+
+#define	prefetch(x, y, z)	__builtin_prefetch(x, y, z)
+#endif
+
 /*
  * Keywords added in C11.
  */


More information about the svn-src-user mailing list