svn commit: r295747 - head/sys/dev/hyperv/netvsc

Sepherosa Ziehau sephe at FreeBSD.org
Thu Feb 18 07:38:00 UTC 2016


Author: sephe
Date: Thu Feb 18 07:37:59 2016
New Revision: 295747
URL: https://svnweb.freebsd.org/changeset/base/295747

Log:
  hyperv/hn: Split TX ring data structure out of softc
  
  This paves the way for upcoming vRSS stuffs and eases more code cleanup.
  
  Reviewed by:	adrian
  Approved by:	adrian (mentor)
  MFC after:	1 week
  Sponsored by:	Microsoft OSTC
  Differential Revision:	https://reviews.freebsd.org/D5283

Modified:
  head/sys/dev/hyperv/netvsc/hv_net_vsc.h
  head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c

Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.h
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.h	Thu Feb 18 07:28:45 2016	(r295746)
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h	Thu Feb 18 07:37:59 2016	(r295747)
@@ -1011,6 +1011,38 @@ struct hn_rx_ring {
 #define HN_TRUST_HCSUM_TCP	0x0002
 #define HN_TRUST_HCSUM_UDP	0x0004
 
+struct hn_tx_ring {
+	struct mtx	hn_txlist_spin;
+	struct hn_txdesc_list hn_txlist;
+	int		hn_txdesc_cnt;
+	int		hn_txdesc_avail;
+	int		hn_txeof;
+
+	int		hn_sched_tx;
+	struct taskqueue *hn_tx_taskq;
+	struct task	hn_start_task;
+	struct task	hn_txeof_task;
+
+	struct mtx	hn_tx_lock;
+	struct hn_softc	*hn_sc;
+
+	int		hn_direct_tx_size;
+	int		hn_tx_chimney_size;
+	bus_dma_tag_t	hn_tx_data_dtag;
+	uint64_t	hn_csum_assist;
+
+	u_long		hn_no_txdescs;
+	u_long		hn_send_failed;
+	u_long		hn_txdma_failed;
+	u_long		hn_tx_collapsed;
+	u_long		hn_tx_chimney;
+
+	/* Rarely used stuffs */
+	struct hn_txdesc *hn_txdesc;
+	bus_dma_tag_t	hn_tx_rndis_dtag;
+	struct sysctl_oid *hn_tx_sysctl_tree;
+} __aligned(CACHE_LINE_SIZE);
+
 /*
  * Device-specific softc structure
  */
@@ -1028,33 +1060,14 @@ typedef struct hn_softc {
 	struct hv_device  *hn_dev_obj;
 	netvsc_dev  	*net_dev;
 
-	struct hn_txdesc *hn_txdesc;
-	bus_dma_tag_t	hn_tx_data_dtag;
-	bus_dma_tag_t	hn_tx_rndis_dtag;
-	int		hn_tx_chimney_size;
-	int		hn_tx_chimney_max;
-	uint64_t	hn_csum_assist;
-
-	struct mtx	hn_txlist_spin;
-	struct hn_txdesc_list hn_txlist;
-	int		hn_txdesc_cnt;
-	int		hn_txdesc_avail;
-	int		hn_txeof;
-
-	int		hn_sched_tx;
-	int		hn_direct_tx_size;
-	struct taskqueue *hn_tx_taskq;
-	struct task	hn_start_task;
-	struct task	hn_txeof_task;
-
 	int		hn_rx_ring_cnt;
 	struct hn_rx_ring *hn_rx_ring;
 
-	u_long		hn_no_txdescs;
-	u_long		hn_send_failed;
-	u_long		hn_txdma_failed;
-	u_long		hn_tx_collapsed;
-	u_long		hn_tx_chimney;
+	int		hn_tx_ring_cnt;
+	struct hn_tx_ring *hn_tx_ring;
+	int		hn_tx_chimney_max;
+	struct taskqueue *hn_tx_taskq;
+	struct sysctl_oid *hn_tx_sysctl_tree;
 } hn_softc_t;
 
 /*

Modified: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	Thu Feb 18 07:28:45 2016	(r295746)
+++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	Thu Feb 18 07:37:59 2016	(r295747)
@@ -153,7 +153,7 @@ __FBSDID("$FreeBSD$");
 struct hn_txdesc {
 	SLIST_ENTRY(hn_txdesc) link;
 	struct mbuf	*m;
-	struct hn_softc	*sc;
+	struct hn_tx_ring *txr;
 	int		refs;
 	uint32_t	flags;		/* HN_TXD_FLAG_ */
 	netvsc_packet	netvsc_pkt;	/* XXX to be removed */
@@ -193,7 +193,6 @@ struct hn_txdesc {
 #define NV_LOCK_INIT(_sc, _name) \
 	    mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF)
 #define NV_LOCK(_sc)		mtx_lock(&(_sc)->hn_lock)
-#define NV_TRYLOCK(_sc)		mtx_trylock(&(_sc)->hn_lock)
 #define NV_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->hn_lock, MA_OWNED)
 #define NV_UNLOCK(_sc)		mtx_unlock(&(_sc)->hn_lock)
 #define NV_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->hn_lock)
@@ -266,9 +265,9 @@ static void hn_stop(hn_softc_t *sc);
 static void hn_ifinit_locked(hn_softc_t *sc);
 static void hn_ifinit(void *xsc);
 static int  hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
-static int hn_start_locked(struct ifnet *ifp, int len);
+static int hn_start_locked(struct hn_tx_ring *txr, int len);
 static void hn_start(struct ifnet *ifp);
-static void hn_start_txeof(struct ifnet *ifp);
+static void hn_start_txeof(struct hn_tx_ring *);
 static int hn_ifmedia_upd(struct ifnet *ifp);
 static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
@@ -277,14 +276,20 @@ static int hn_trust_hcsum_sysctl(SYSCTL_
 static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_check_iplen(const struct mbuf *, int);
-static int hn_create_tx_ring(struct hn_softc *sc);
-static void hn_destroy_tx_ring(struct hn_softc *sc);
+static int hn_create_tx_ring(struct hn_softc *, int);
+static void hn_destroy_tx_ring(struct hn_tx_ring *);
+static int hn_create_tx_data(struct hn_softc *);
+static void hn_destroy_tx_data(struct hn_softc *);
 static void hn_start_taskfunc(void *xsc, int pending);
 static void hn_txeof_taskfunc(void *xsc, int pending);
-static int hn_encap(struct hn_softc *, struct hn_txdesc *, struct mbuf **);
+static void hn_stop_tx_tasks(struct hn_softc *);
+static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
 static void hn_create_rx_data(struct hn_softc *sc);
 static void hn_destroy_rx_data(struct hn_softc *sc);
+static void hn_set_tx_chimney_size(struct hn_softc *, int);
 
 static int
 hn_ifmedia_upd(struct ifnet *ifp __unused)
@@ -350,8 +355,6 @@ netvsc_attach(device_t dev)
 	hn_softc_t *sc;
 	int unit = device_get_unit(dev);
 	struct ifnet *ifp = NULL;
-	struct sysctl_oid_list *child;
-	struct sysctl_ctx_list *ctx;
 	int error;
 #if __FreeBSD_version >= 1100045
 	int tso_maxlen;
@@ -365,7 +368,6 @@ netvsc_attach(device_t dev)
 	bzero(sc, sizeof(hn_softc_t));
 	sc->hn_unit = unit;
 	sc->hn_dev = dev;
-	sc->hn_direct_tx_size = hn_direct_tx_size;
 
 	if (hn_tx_taskq == NULL) {
 		sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
@@ -375,13 +377,6 @@ netvsc_attach(device_t dev)
 	} else {
 		sc->hn_tx_taskq = hn_tx_taskq;
 	}
-	TASK_INIT(&sc->hn_start_task, 0, hn_start_taskfunc, sc);
-	TASK_INIT(&sc->hn_txeof_task, 0, hn_txeof_taskfunc, sc);
-
-	error = hn_create_tx_ring(sc);
-	if (error)
-		goto failed;
-
 	NV_LOCK_INIT(sc, "NetVSCLock");
 
 	sc->hn_dev_obj = device_ctx;
@@ -389,6 +384,10 @@ netvsc_attach(device_t dev)
 	ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
 	ifp->if_softc = sc;
 
+	error = hn_create_tx_data(sc);
+	if (error)
+		goto failed;
+
 	hn_create_rx_data(sc);
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
@@ -421,12 +420,7 @@ netvsc_attach(device_t dev)
 	ifp->if_capenable |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
 	    IFCAP_LRO;
-
-	if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
-		sc->hn_csum_assist = HN_CSUM_ASSIST;
-	else
-		sc->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
-	ifp->if_hwassist = sc->hn_csum_assist | CSUM_TSO;
+	ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO;
 
 	error = hv_rf_on_device_add(device_ctx, &device_info);
 	if (error)
@@ -455,52 +449,14 @@ netvsc_attach(device_t dev)
 #endif
 
 	sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
-	sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
+	hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
 	if (hn_tx_chimney_size > 0 &&
 	    hn_tx_chimney_size < sc->hn_tx_chimney_max)
-		sc->hn_tx_chimney_size = hn_tx_chimney_size;
-
-	/*
-	 * Always schedule transmission instead of trying
-	 * to do direct transmission.  This one gives the
-	 * best performance so far.
-	 */
-	sc->hn_sched_tx = 1;
-
-	ctx = device_get_sysctl_ctx(dev);
-	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
-
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_txdescs",
-	    CTLFLAG_RW, &sc->hn_no_txdescs, "# of times short of TX descs");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "send_failed",
-	    CTLFLAG_RW, &sc->hn_send_failed, "# of hyper-v sending failure");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "txdma_failed",
-	    CTLFLAG_RW, &sc->hn_txdma_failed, "# of TX DMA failure");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_collapsed",
-	    CTLFLAG_RW, &sc->hn_tx_collapsed, "# of TX mbuf collapsed");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_chimney",
-	    CTLFLAG_RW, &sc->hn_tx_chimney, "# of chimney send");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
-	    CTLFLAG_RD, &sc->hn_txdesc_cnt, 0, "# of total TX descs");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
-	    CTLFLAG_RD, &sc->hn_txdesc_avail, 0, "# of available TX descs");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
-	    CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
-	    "Chimney send packet size upper boundary");
-	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
-	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
-	    "I", "Chimney send packet size limit");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "direct_tx_size",
-	    CTLFLAG_RW, &sc->hn_direct_tx_size, 0,
-	    "Size of the packet for direct transmission");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "sched_tx",
-	    CTLFLAG_RW, &sc->hn_sched_tx, 0,
-	    "Always schedule transmission "
-	    "instead of doing direct transmission");
+		hn_set_tx_chimney_size(sc, hn_tx_chimney_size);
 
 	return (0);
 failed:
-	hn_destroy_tx_ring(sc);
+	hn_destroy_tx_data(sc);
 	if (ifp != NULL)
 		if_free(ifp);
 	return (error);
@@ -531,14 +487,14 @@ netvsc_detach(device_t dev)
 
 	hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
 
-	taskqueue_drain(sc->hn_tx_taskq, &sc->hn_start_task);
-	taskqueue_drain(sc->hn_tx_taskq, &sc->hn_txeof_task);
-	if (sc->hn_tx_taskq != hn_tx_taskq)
-		taskqueue_free(sc->hn_tx_taskq);
+	hn_stop_tx_tasks(sc);
 
 	ifmedia_removeall(&sc->hn_media);
 	hn_destroy_rx_data(sc);
-	hn_destroy_tx_ring(sc);
+	hn_destroy_tx_data(sc);
+
+	if (sc->hn_tx_taskq != hn_tx_taskq)
+		taskqueue_free(sc->hn_tx_taskq);
 
 	return (0);
 }
@@ -553,13 +509,13 @@ netvsc_shutdown(device_t dev)
 }
 
 static __inline int
-hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
+hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
 {
 	struct mbuf *m = *m_head;
 	int error;
 
-	error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, txd->data_dmap,
+	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
 	    m, segs, nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		struct mbuf *m_new;
@@ -569,13 +525,13 @@ hn_txdesc_dmamap_load(struct hn_softc *s
 			return ENOBUFS;
 		else
 			*m_head = m = m_new;
-		sc->hn_tx_collapsed++;
+		txr->hn_tx_collapsed++;
 
-		error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag,
+		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
 		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
 	}
 	if (!error) {
-		bus_dmamap_sync(sc->hn_tx_data_dtag, txd->data_dmap,
+		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
 		    BUS_DMASYNC_PREWRITE);
 		txd->flags |= HN_TXD_FLAG_DMAMAP;
 	}
@@ -583,20 +539,20 @@ hn_txdesc_dmamap_load(struct hn_softc *s
 }
 
 static __inline void
-hn_txdesc_dmamap_unload(struct hn_softc *sc, struct hn_txdesc *txd)
+hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	if (txd->flags & HN_TXD_FLAG_DMAMAP) {
-		bus_dmamap_sync(sc->hn_tx_data_dtag,
+		bus_dmamap_sync(txr->hn_tx_data_dtag,
 		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
-		bus_dmamap_unload(sc->hn_tx_data_dtag,
+		bus_dmamap_unload(txr->hn_tx_data_dtag,
 		    txd->data_dmap);
 		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
 	}
 }
 
 static __inline int
-hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
+hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
@@ -606,7 +562,7 @@ hn_txdesc_put(struct hn_softc *sc, struc
 	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
 		return 0;
 
-	hn_txdesc_dmamap_unload(sc, txd);
+	hn_txdesc_dmamap_unload(txr, txd);
 	if (txd->m != NULL) {
 		m_freem(txd->m);
 		txd->m = NULL;
@@ -614,31 +570,31 @@ hn_txdesc_put(struct hn_softc *sc, struc
 
 	txd->flags |= HN_TXD_FLAG_ONLIST;
 
-	mtx_lock_spin(&sc->hn_txlist_spin);
-	KASSERT(sc->hn_txdesc_avail >= 0 &&
-	    sc->hn_txdesc_avail < sc->hn_txdesc_cnt,
-	    ("txdesc_put: invalid txd avail %d", sc->hn_txdesc_avail));
-	sc->hn_txdesc_avail++;
-	SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
-	mtx_unlock_spin(&sc->hn_txlist_spin);
+	mtx_lock_spin(&txr->hn_txlist_spin);
+	KASSERT(txr->hn_txdesc_avail >= 0 &&
+	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
+	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
+	txr->hn_txdesc_avail++;
+	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
+	mtx_unlock_spin(&txr->hn_txlist_spin);
 
 	return 1;
 }
 
 static __inline struct hn_txdesc *
-hn_txdesc_get(struct hn_softc *sc)
+hn_txdesc_get(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
-	mtx_lock_spin(&sc->hn_txlist_spin);
-	txd = SLIST_FIRST(&sc->hn_txlist);
+	mtx_lock_spin(&txr->hn_txlist_spin);
+	txd = SLIST_FIRST(&txr->hn_txlist);
 	if (txd != NULL) {
-		KASSERT(sc->hn_txdesc_avail > 0,
-		    ("txdesc_get: invalid txd avail %d", sc->hn_txdesc_avail));
-		sc->hn_txdesc_avail--;
-		SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
+		KASSERT(txr->hn_txdesc_avail > 0,
+		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
+		txr->hn_txdesc_avail--;
+		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 	}
-	mtx_unlock_spin(&sc->hn_txlist_spin);
+	mtx_unlock_spin(&txr->hn_txlist_spin);
 
 	if (txd != NULL) {
 		KASSERT(txd->m == NULL && txd->refs == 0 &&
@@ -670,20 +626,21 @@ netvsc_xmit_completion(void *context)
 {
 	netvsc_packet *packet = context;
 	struct hn_txdesc *txd;
-	struct hn_softc *sc;
+	struct hn_tx_ring *txr;
 
 	txd = (struct hn_txdesc *)(uintptr_t)
 	    packet->compl.send.send_completion_tid;
 
-	sc = txd->sc;
-	sc->hn_txeof = 1;
-	hn_txdesc_put(sc, txd);
+	txr = txd->txr;
+	txr->hn_txeof = 1;
+	hn_txdesc_put(txr, txd);
 }
 
 void
 netvsc_channel_rollup(struct hv_device *device_ctx)
 {
 	struct hn_softc *sc = device_get_softc(device_ctx->device);
+	struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; /* TODO: vRSS */
 #if defined(INET) || defined(INET6)
 	struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */
 	struct lro_ctrl *lro = &rxr->hn_lro;
@@ -695,11 +652,11 @@ netvsc_channel_rollup(struct hv_device *
 	}
 #endif
 
-	if (!sc->hn_txeof)
+	if (!txr->hn_txeof)
 		return;
 
-	sc->hn_txeof = 0;
-	hn_start_txeof(sc->hn_ifp);
+	txr->hn_txeof = 0;
+	hn_start_txeof(txr);
 }
 
 /*
@@ -707,7 +664,7 @@ netvsc_channel_rollup(struct hv_device *
  * If this function fails, then both txd and m_head0 will be freed.
  */
 static int
-hn_encap(struct hn_softc *sc, struct hn_txdesc *txd, struct mbuf **m_head0)
+hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
 {
 	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
 	int error, nsegs, i;
@@ -810,7 +767,7 @@ hn_encap(struct hn_softc *sc, struct hn_
 #endif
 		tso_info->lso_v2_xmit.tcp_header_offset = 0;
 		tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz;
-	} else if (m_head->m_pkthdr.csum_flags & sc->hn_csum_assist) {
+	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
 		rndis_tcp_ip_csum_info *csum_info;
 
 		rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
@@ -837,8 +794,8 @@ hn_encap(struct hn_softc *sc, struct hn_
 	/*
 	 * Chimney send, if the packet could fit into one chimney buffer.
 	 */
-	if (packet->tot_data_buf_len < sc->hn_tx_chimney_size) {
-		netvsc_dev *net_dev = sc->net_dev;
+	if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) {
+		netvsc_dev *net_dev = txr->hn_sc->net_dev;
 		uint32_t send_buf_section_idx;
 
 		send_buf_section_idx =
@@ -857,12 +814,12 @@ hn_encap(struct hn_softc *sc, struct hn_
 			packet->send_buf_section_size =
 			    packet->tot_data_buf_len;
 			packet->page_buf_count = 0;
-			sc->hn_tx_chimney++;
+			txr->hn_tx_chimney++;
 			goto done;
 		}
 	}
 
-	error = hn_txdesc_dmamap_load(sc, txd, &m_head, segs, &nsegs);
+	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
 	if (error) {
 		int freed;
 
@@ -872,12 +829,12 @@ hn_encap(struct hn_softc *sc, struct hn_
 		m_freem(m_head);
 		*m_head0 = NULL;
 
-		freed = hn_txdesc_put(sc, txd);
+		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon txdma error"));
 
-		sc->hn_txdma_failed++;
-		if_inc_counter(sc->hn_ifp, IFCOUNTER_OERRORS, 1);
+		txr->hn_txdma_failed++;
+		if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
 		return error;
 	}
 	*m_head0 = m_head;
@@ -920,11 +877,15 @@ done:
  * Start a transmit of one or more packets
  */
 static int
-hn_start_locked(struct ifnet *ifp, int len)
+hn_start_locked(struct hn_tx_ring *txr, int len)
 {
-	struct hn_softc *sc = ifp->if_softc;
+	struct hn_softc *sc = txr->hn_sc;
+	struct ifnet *ifp = sc->hn_ifp;
 	struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
 
+	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
+	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
+
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return 0;
@@ -948,15 +909,15 @@ hn_start_locked(struct ifnet *ifp, int l
 			return 1;
 		}
 
-		txd = hn_txdesc_get(sc);
+		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
-			sc->hn_no_txdescs++;
+			txr->hn_no_txdescs++;
 			IF_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 
-		error = hn_encap(sc, txd, &m_head);
+		error = hn_encap(txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed */
 			continue;
@@ -971,7 +932,7 @@ again:
 			ETHER_BPF_MTAP(ifp, m_head);
 			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		}
-		hn_txdesc_put(sc, txd);
+		hn_txdesc_put(txr, txd);
 
 		if (__predict_false(error)) {
 			int freed;
@@ -983,9 +944,9 @@ again:
 			 * commands to run?  Ask netvsc_channel_rollup()
 			 * to kick start later.
 			 */
-			sc->hn_txeof = 1;
+			txr->hn_txeof = 1;
 			if (!send_failed) {
-				sc->hn_send_failed++;
+				txr->hn_send_failed++;
 				send_failed = 1;
 				/*
 				 * Try sending again after set hn_txeof;
@@ -1002,11 +963,11 @@ again:
 			 * DMA map in hn_txdesc_put(), if it was loaded.
 			 */
 			txd->m = NULL;
-			freed = hn_txdesc_put(sc, txd);
+			freed = hn_txdesc_put(txr, txd);
 			KASSERT(freed != 0,
 			    ("fail to free txd upon send error"));
 
-			sc->hn_send_failed++;
+			txr->hn_send_failed++;
 			IF_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
@@ -1384,8 +1345,10 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, 
 		}
 
 		sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
-		if (sc->hn_tx_chimney_size > sc->hn_tx_chimney_max)
-			sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
+		if (sc->hn_tx_ring[0].hn_tx_chimney_size >
+		    sc->hn_tx_chimney_max)
+			hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
+
 		hn_ifinit_locked(sc);
 
 		NV_LOCK(sc);
@@ -1450,10 +1413,13 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, 
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
-			if (ifp->if_capenable & IFCAP_TXCSUM)
-				ifp->if_hwassist |= sc->hn_csum_assist;
-			else
-				ifp->if_hwassist &= ~sc->hn_csum_assist;
+			if (ifp->if_capenable & IFCAP_TXCSUM) {
+				ifp->if_hwassist |=
+				    sc->hn_tx_ring[0].hn_csum_assist;
+			} else {
+				ifp->if_hwassist &=
+				    ~sc->hn_tx_ring[0].hn_csum_assist;
+			}
 		}
 
 		if (mask & IFCAP_RXCSUM)
@@ -1536,48 +1502,54 @@ static void
 hn_start(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
+	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
 
-	if (sc->hn_sched_tx)
+	if (txr->hn_sched_tx)
 		goto do_sched;
 
-	if (NV_TRYLOCK(sc)) {
+	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
-		sched = hn_start_locked(ifp, sc->hn_direct_tx_size);
-		NV_UNLOCK(sc);
+		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
+		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return;
 	}
 do_sched:
-	taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_start_task);
+	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_start_task);
 }
 
 static void
-hn_start_txeof(struct ifnet *ifp)
+hn_start_txeof(struct hn_tx_ring *txr)
 {
-	struct hn_softc *sc = ifp->if_softc;
+	struct hn_softc *sc = txr->hn_sc;
+	struct ifnet *ifp = sc->hn_ifp;
 
-	if (sc->hn_sched_tx)
+	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
+
+	if (txr->hn_sched_tx)
 		goto do_sched;
 
-	if (NV_TRYLOCK(sc)) {
+	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
-		sched = hn_start_locked(ifp, sc->hn_direct_tx_size);
-		NV_UNLOCK(sc);
-		if (sched)
-			taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_start_task);
+		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
+		mtx_unlock(&txr->hn_tx_lock);
+		if (sched) {
+			taskqueue_enqueue(txr->hn_tx_taskq,
+			    &txr->hn_start_task);
+		}
 	} else {
 do_sched:
 		/*
 		 * Release the OACTIVE earlier, with the hope, that
 		 * others could catch up.  The task will clear the
-		 * flag again with the NV_LOCK to avoid possible
+		 * flag again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
-		taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_txeof_task);
+		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
@@ -1736,7 +1708,7 @@ hn_tx_chimney_size_sysctl(SYSCTL_HANDLER
 	struct hn_softc *sc = arg1;
 	int chimney_size, error;
 
-	chimney_size = sc->hn_tx_chimney_size;
+	chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size;
 	error = sysctl_handle_int(oidp, &chimney_size, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
@@ -1744,8 +1716,7 @@ hn_tx_chimney_size_sysctl(SYSCTL_HANDLER
 	if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0)
 		return EINVAL;
 
-	if (sc->hn_tx_chimney_size != chimney_size)
-		sc->hn_tx_chimney_size = chimney_size;
+	hn_set_tx_chimney_size(sc, chimney_size);
 	return 0;
 }
 
@@ -1802,6 +1773,56 @@ hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARG
 }
 
 static int
+hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_tx_ring *txr;
+	u_long stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		stat += *((u_long *)((uint8_t *)txr + ofs));
+	}
+
+	error = sysctl_handle_long(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		*((u_long *)((uint8_t *)txr + ofs)) = 0;
+	}
+	return 0;
+}
+
+static int
+hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error, conf;
+	struct hn_tx_ring *txr;
+
+	txr = &sc->hn_tx_ring[0];
+	conf = *((int *)((uint8_t *)txr + ofs));
+
+	error = sysctl_handle_int(oidp, &conf, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	NV_LOCK(sc);
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		*((int *)((uint8_t *)txr + ofs)) = conf;
+	}
+	NV_UNLOCK(sc);
+
+	return 0;
+}
+
+static int
 hn_check_iplen(const struct mbuf *m, int hoff)
 {
 	const struct ip *ip;
@@ -2019,16 +2040,37 @@ hn_destroy_rx_data(struct hn_softc *sc)
 }
 
 static int
-hn_create_tx_ring(struct hn_softc *sc)
+hn_create_tx_ring(struct hn_softc *sc, int id)
 {
+	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
 	bus_dma_tag_t parent_dtag;
 	int error, i;
 
-	sc->hn_txdesc_cnt = HN_TX_DESC_CNT;
-	sc->hn_txdesc = malloc(sizeof(struct hn_txdesc) * sc->hn_txdesc_cnt,
+	txr->hn_sc = sc;
+
+	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
+	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
+
+	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
+	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
-	SLIST_INIT(&sc->hn_txlist);
-	mtx_init(&sc->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
+	SLIST_INIT(&txr->hn_txlist);
+
+	txr->hn_tx_taskq = sc->hn_tx_taskq;
+	TASK_INIT(&txr->hn_start_task, 0, hn_start_taskfunc, txr);
+	TASK_INIT(&txr->hn_txeof_task, 0, hn_txeof_taskfunc, txr);
+
+	txr->hn_direct_tx_size = hn_direct_tx_size;
+	if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
+		txr->hn_csum_assist = HN_CSUM_ASSIST;
+	else
+		txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
+
+	/*
+	 * Always schedule transmission instead of trying to do direct
+	 * transmission.  This one gives the best performance so far.
+	 */
+	txr->hn_sched_tx = 1;
 
 	parent_dtag = bus_get_dma_tag(sc->hn_dev);
 
@@ -2045,7 +2087,7 @@ hn_create_tx_ring(struct hn_softc *sc)
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
-	    &sc->hn_tx_rndis_dtag);
+	    &txr->hn_tx_rndis_dtag);
 	if (error) {
 		device_printf(sc->hn_dev, "failed to create rndis dmatag\n");
 		return error;
@@ -2064,21 +2106,21 @@ hn_create_tx_ring(struct hn_softc *sc)
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
-	    &sc->hn_tx_data_dtag);
+	    &txr->hn_tx_data_dtag);
 	if (error) {
 		device_printf(sc->hn_dev, "failed to create data dmatag\n");
 		return error;
 	}
 
-	for (i = 0; i < sc->hn_txdesc_cnt; ++i) {
-		struct hn_txdesc *txd = &sc->hn_txdesc[i];
+	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
+		struct hn_txdesc *txd = &txr->hn_txdesc[i];
 
-		txd->sc = sc;
+		txd->txr = txr;
 
 		/*
 		 * Allocate and load RNDIS messages.
 		 */
-        	error = bus_dmamem_alloc(sc->hn_tx_rndis_dtag,
+        	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
 		    (void **)&txd->rndis_msg,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT,
 		    &txd->rndis_msg_dmap);
@@ -2088,7 +2130,7 @@ hn_create_tx_ring(struct hn_softc *sc)
 			return error;
 		}
 
-		error = bus_dmamap_load(sc->hn_tx_rndis_dtag,
+		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
 		    txd->rndis_msg_dmap,
 		    txd->rndis_msg, HN_RNDIS_MSG_LEN,
 		    hn_dma_map_paddr, &txd->rndis_msg_paddr,
@@ -2096,80 +2138,221 @@ hn_create_tx_ring(struct hn_softc *sc)
 		if (error) {
 			device_printf(sc->hn_dev,
 			    "failed to load rndis_msg, %d\n", i);
-			bus_dmamem_free(sc->hn_tx_rndis_dtag,
+			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* DMA map for TX data. */
-		error = bus_dmamap_create(sc->hn_tx_data_dtag, 0,
+		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
 		    &txd->data_dmap);
 		if (error) {
 			device_printf(sc->hn_dev,
 			    "failed to allocate tx data dmamap\n");
-			bus_dmamap_unload(sc->hn_tx_rndis_dtag,
+			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg_dmap);
-			bus_dmamem_free(sc->hn_tx_rndis_dtag,
+			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* All set, put it to list */
 		txd->flags |= HN_TXD_FLAG_ONLIST;
-		SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
+		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
+	}
+	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
+
+	if (sc->hn_tx_sysctl_tree != NULL) {
+		struct sysctl_oid_list *child;
+		struct sysctl_ctx_list *ctx;
+		char name[16];
+
+		/*
+		 * Create per TX ring sysctl tree:
+		 * dev.hn.UNIT.tx.RINGID
+		 */
+		ctx = device_get_sysctl_ctx(sc->hn_dev);
+		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
+
+		snprintf(name, sizeof(name), "%d", id);
+		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
+		    name, CTLFLAG_RD, 0, "");
+
+		if (txr->hn_tx_sysctl_tree != NULL) {
+			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
+
+			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
+			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
+			    "# of available TX descs");
+		}
 	}
-	sc->hn_txdesc_avail = sc->hn_txdesc_cnt;
 
 	return 0;
 }
 
 static void
-hn_destroy_tx_ring(struct hn_softc *sc)
+hn_destroy_tx_ring(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
-	while ((txd = SLIST_FIRST(&sc->hn_txlist)) != NULL) {
+	if (txr->hn_txdesc == NULL)
+		return;
+
+	while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
 		KASSERT(txd->m == NULL, ("still has mbuf installed"));
 		KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
 		    ("still dma mapped"));
-		SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
+		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 
-		bus_dmamap_unload(sc->hn_tx_rndis_dtag,
+		bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 		    txd->rndis_msg_dmap);
-		bus_dmamem_free(sc->hn_tx_rndis_dtag,
+		bus_dmamem_free(txr->hn_tx_rndis_dtag,
 		    txd->rndis_msg, txd->rndis_msg_dmap);
 
-		bus_dmamap_destroy(sc->hn_tx_data_dtag, txd->data_dmap);
+		bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
 	}
 
-	if (sc->hn_tx_data_dtag != NULL)
-		bus_dma_tag_destroy(sc->hn_tx_data_dtag);
-	if (sc->hn_tx_rndis_dtag != NULL)
-		bus_dma_tag_destroy(sc->hn_tx_rndis_dtag);
-	free(sc->hn_txdesc, M_NETVSC);
-	mtx_destroy(&sc->hn_txlist_spin);
+	if (txr->hn_tx_data_dtag != NULL)
+		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
+	if (txr->hn_tx_rndis_dtag != NULL)
+		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
+	free(txr->hn_txdesc, M_NETVSC);
+	txr->hn_txdesc = NULL;
+
+	mtx_destroy(&txr->hn_txlist_spin);
+	mtx_destroy(&txr->hn_tx_lock);
+}
+
+static int
+hn_create_tx_data(struct hn_softc *sc)
+{
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	int i;
+
+	sc->hn_tx_ring_cnt = 1; /* TODO: vRSS */
+	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
+	    M_NETVSC, M_WAITOK | M_ZERO);
+
+	ctx = device_get_sysctl_ctx(sc->hn_dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
+
+	/* Create dev.hn.UNIT.tx sysctl tree */
+	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
+	    CTLFLAG_RD, 0, "");
+
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		int error;
+
+		error = hn_create_tx_ring(sc, i);
+		if (error)
+			return error;
+	}
+
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_send_failed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
+	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
+	    "# of total TX descs");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
+	    CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
+	    "Chimney send packet size upper boundary");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
+	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
+	    "I", "Chimney send packet size limit");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
+	    CTLTYPE_INT | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
+	    hn_tx_conf_int_sysctl, "I",
+	    "Size of the packet for direct transmission");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
+	    CTLTYPE_INT | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_sched_tx),
+	    hn_tx_conf_int_sysctl, "I",
+	    "Always schedule transmission "
+	    "instead of doing direct transmission");
+
+	return 0;
 }
 
 static void
-hn_start_taskfunc(void *xsc, int pending __unused)
+hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size)
 {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list