svn commit: r296379 - in head/sys/dev/hyperv: include netvsc vmbus

Sepherosa Ziehau sephe at FreeBSD.org
Fri Mar 4 06:52:13 UTC 2016


Author: sephe
Date: Fri Mar  4 06:52:11 2016
New Revision: 296379
URL: https://svnweb.freebsd.org/changeset/base/296379

Log:
  hyperv/hn: Add multiple channel support, a.k.a. vRSS
  
  Each channel contains one RX ring and one TX ring.  And we
  try to distribute the channels to different evenly.
  
  Note: Currently we don't have enough information to extract
  the RSS type and RSS hash value from the received packets.
  
  This greatly improves the TX/RX performance for 8 virtual CPU
  Hyper-V over 10Ge: it can max out 10Ge for TCP when multiple
  RX/TX rings are enabled.
  
  This almost doubles the TX/RX performance for locally connected
  Hyper-Vs: was 6Gbps w/ 128 TCP streams, now 11Gbps w/ multiple
  RX/TX rings enabled.
  
  It is not enabled by default; it will be switched on after more
  tests.
  
  Collaborated with:	Hongjiang Zhang <honzhan microsoft com>
  MFC after:	2 week
  Sponsored by:	Microsoft OSTC

Modified:
  head/sys/dev/hyperv/include/hyperv.h
  head/sys/dev/hyperv/netvsc/hv_net_vsc.c
  head/sys/dev/hyperv/netvsc/hv_net_vsc.h
  head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
  head/sys/dev/hyperv/netvsc/hv_rndis.h
  head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
  head/sys/dev/hyperv/netvsc/hv_rndis_filter.h
  head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c

Modified: head/sys/dev/hyperv/include/hyperv.h
==============================================================================
--- head/sys/dev/hyperv/include/hyperv.h	Fri Mar  4 05:36:53 2016	(r296378)
+++ head/sys/dev/hyperv/include/hyperv.h	Fri Mar  4 06:52:11 2016	(r296379)
@@ -911,6 +911,8 @@ int		hv_vmbus_channel_teardown_gpdal(
 
 struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
+void		vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu);
+
 /**
  * @brief Get physical address from virtual
  */

Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.c	Fri Mar  4 05:36:53 2016	(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.c	Fri Mar  4 06:52:11 2016	(r296379)
@@ -57,7 +57,7 @@ MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper
 /*
  * Forward declarations
  */
-static void hv_nv_on_channel_callback(void *context);
+static void hv_nv_on_channel_callback(void *xchan);
 static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
 static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
 static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
@@ -662,6 +662,34 @@ hv_nv_disconnect_from_vsp(netvsc_dev *ne
 }
 
 /*
+ * Callback handler for subchannel offer
+ * @@param context new subchannel
+ */
+static void
+hv_nv_subchan_callback(void *xchan)
+{
+	struct hv_vmbus_channel *chan = xchan;
+	netvsc_dev *net_dev;
+	uint16_t chn_index = chan->offer_msg.offer.sub_channel_index;
+	struct hv_device *device = chan->device;
+	hn_softc_t *sc = device_get_softc(device->device);
+	int ret;
+
+	net_dev = sc->net_dev;
+
+	if (chn_index >= net_dev->num_channel) {
+		/* Would this ever happen? */
+		return;
+	}
+	netvsc_subchan_callback(sc, chan);
+
+	chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
+	ret = hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE,
+	    NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0,
+	    hv_nv_on_channel_callback, chan);
+}
+
+/*
  * Net VSC on device add
  * 
  * Callback when the device belonging to this driver is added
@@ -693,6 +721,7 @@ hv_nv_on_device_add(struct hv_device *de
 		free(chan->hv_chan_rdbuf, M_NETVSC);
 		goto cleanup;
 	}
+	chan->sc_creation_callback = hv_nv_subchan_callback;
 
 	/*
 	 * Connect with the NetVsp
@@ -770,7 +799,9 @@ hv_nv_on_send_completion(netvsc_dev *net
 		|| nvsp_msg_pkt->hdr.msg_type
 			== nvsp_msg_1_type_send_rx_buf_complete
 		|| nvsp_msg_pkt->hdr.msg_type
-			== nvsp_msg_1_type_send_send_buf_complete) {
+			== nvsp_msg_1_type_send_send_buf_complete
+		|| nvsp_msg_pkt->hdr.msg_type
+			== nvsp_msg5_type_subchannel) {
 		/* Copy the response back */
 		memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
 		    sizeof(nvsp_msg));
@@ -964,6 +995,46 @@ retry_send_cmplt:
 }
 
 /*
+ * Net VSC receiving vRSS send table from VSP
+ */
+static void
+hv_nv_send_table(struct hv_device *device, hv_vm_packet_descriptor *pkt)
+{
+	netvsc_dev *net_dev;
+	nvsp_msg *nvsp_msg_pkt;
+	int i;
+	uint32_t count, *table;
+
+	net_dev = hv_nv_get_inbound_net_device(device);
+	if (!net_dev)
+        	return;
+
+	nvsp_msg_pkt =
+	    (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
+
+	if (nvsp_msg_pkt->hdr.msg_type !=
+	    nvsp_msg5_type_send_indirection_table) {
+		printf("Netvsc: !Warning! receive msg type not "
+			"send_indirection_table. type = %d\n",
+			nvsp_msg_pkt->hdr.msg_type);
+		return;
+	}
+
+	count = nvsp_msg_pkt->msgs.vers_5_msgs.send_table.count;
+	if (count != VRSS_SEND_TABLE_SIZE) {
+        	printf("Netvsc: Received wrong send table size: %u\n", count);
+	        return;
+	}
+
+	table = (uint32_t *)
+	    ((unsigned long)&nvsp_msg_pkt->msgs.vers_5_msgs.send_table +
+	     nvsp_msg_pkt->msgs.vers_5_msgs.send_table.offset);
+
+	for (i = 0; i < count; i++)
+        	net_dev->vrss_send_table[i] = table[i];
+}
+
+/*
  * Net VSC on channel callback
  */
 static void
@@ -999,6 +1070,9 @@ hv_nv_on_channel_callback(void *xchan)
 				case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
 					hv_nv_on_receive(net_dev, device, chan, desc);
 					break;
+				case HV_VMBUS_PACKET_TYPE_DATA_IN_BAND:
+					hv_nv_send_table(device, desc);
+					break;
 				default:
 					device_printf(dev,
 					    "hv_cb recv unknow type %d "

Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.h
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.h	Fri Mar  4 05:36:53 2016	(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h	Fri Mar  4 06:52:11 2016	(r296379)
@@ -86,6 +86,92 @@ MALLOC_DECLARE(M_NETVSC);
  */
 #define NVSP_MAX_PACKETS_PER_RECEIVE            375
 
+/* vRSS stuff */
+#define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES      0x88
+#define RNDIS_OBJECT_TYPE_RSS_PARAMETERS        0x89
+
+#define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2     2
+#define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2       2
+
+struct rndis_obj_header {
+        uint8_t type;
+        uint8_t rev;
+        uint16_t size;
+} __packed;
+
+/* rndis_recv_scale_cap/cap_flag */
+#define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS      0x01000000
+#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR            0x02000000
+#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC            0x04000000
+#define RNDIS_RSS_CAPS_USING_MSI_X                      0x08000000
+#define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS           0x10000000
+#define RNDIS_RSS_CAPS_SUPPORTS_MSI_X                   0x20000000
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4               0x00000100
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6               0x00000200
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX            0x00000400
+
+/* RNDIS_RECEIVE_SCALE_CAPABILITIES */
+struct rndis_recv_scale_cap {
+        struct rndis_obj_header hdr;
+        uint32_t cap_flag;
+        uint32_t num_int_msg;
+        uint32_t num_recv_que;
+        uint16_t num_indirect_tabent;
+} __packed;
+
+/* rndis_recv_scale_param flags */
+#define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED         0x0001
+#define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED        0x0002
+#define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED           0x0004
+#define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED         0x0008
+#define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS                0x0010
+
+/* Hash info bits */
+#define RNDIS_HASH_FUNC_TOEPLITZ                0x00000001
+#define RNDIS_HASH_IPV4                         0x00000100
+#define RNDIS_HASH_TCP_IPV4                     0x00000200
+#define RNDIS_HASH_IPV6                         0x00000400
+#define RNDIS_HASH_IPV6_EX                      0x00000800
+#define RNDIS_HASH_TCP_IPV6                     0x00001000
+#define RNDIS_HASH_TCP_IPV6_EX                  0x00002000
+
+#define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
+#define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
+
+#define ITAB_NUM                                        128
+#define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
+
+/* RNDIS_RECEIVE_SCALE_PARAMETERS */
+typedef struct rndis_recv_scale_param_ {
+        struct rndis_obj_header hdr;
+
+        /* Qualifies the rest of the information */
+        uint16_t flag;
+
+        /* The base CPU number to do receive processing. not used */
+        uint16_t base_cpu_number;
+
+        /* This describes the hash function and type being enabled */
+        uint32_t hashinfo;
+
+        /* The size of indirection table array */
+        uint16_t indirect_tabsize;
+
+        /* The offset of the indirection table from the beginning of this
+         * structure
+         */
+        uint32_t indirect_taboffset;
+
+        /* The size of the hash secret key */
+        uint16_t hashkey_size;
+
+        /* The offset of the secret key from the beginning of this structure */
+        uint32_t hashkey_offset;
+
+        uint32_t processor_masks_offset;
+        uint32_t num_processor_masks;
+        uint32_t processor_masks_entry_size;
+} rndis_recv_scale_param;
 
 typedef enum nvsp_msg_type_ {
 	nvsp_msg_type_none                      = 0,
@@ -146,6 +232,27 @@ typedef enum nvsp_msg_type_ {
 
 	nvsp_msg_2_type_alloc_chimney_handle,
 	nvsp_msg_2_type_alloc_chimney_handle_complete,
+
+	nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete,
+
+	/*
+	 * Version 4 Messages
+	 */
+	nvsp_msg4_type_send_vf_association,
+	nvsp_msg4_type_switch_data_path,
+	nvsp_msg4_type_uplink_connect_state_deprecated,
+
+	nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated,
+
+	/*
+	 * Version 5 Messages
+	 */
+	nvsp_msg5_type_oid_query_ex,
+	nvsp_msg5_type_oid_query_ex_comp,
+	nvsp_msg5_type_subchannel,
+	nvsp_msg5_type_send_indirection_table,
+
+	nvsp_msg5_max = nvsp_msg5_type_send_indirection_table,
 } nvsp_msg_type;
 
 typedef enum nvsp_status_ {
@@ -793,6 +900,39 @@ typedef struct nvsp_2_msg_send_vmq_rndis
 	uint32_t                                status;
 } __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
 
+/*
+ * Version 5 messages
+ */
+enum nvsp_subchannel_operation {
+        NVSP_SUBCHANNEL_NONE = 0,
+        NVSP_SUBCHANNE_ALLOCATE,
+        NVSP_SUBCHANNE_MAX
+};
+
+typedef struct nvsp_5_subchannel_request_
+{
+        uint32_t                                op;
+        uint32_t                                num_subchannels;
+} __packed nvsp_5_subchannel_request;
+
+typedef struct nvsp_5_subchannel_complete_
+{
+        uint32_t                                status;
+        /* Actual number of subchannels allocated */
+        uint32_t                                num_subchannels;
+} __packed nvsp_5_subchannel_complete;
+
+typedef struct nvsp_5_send_indirect_table_
+{
+        /* The number of entries in the send indirection table */
+        uint32_t                                count;
+        /*
+         * The offset of the send indireciton table from top of
+         * this struct. The send indirection table tells which channel
+         * to put the send traffic on. Each entry is a channel number.
+         */
+        uint32_t                                offset;
+} __packed nvsp_5_send_indirect_table;
 
 typedef union nvsp_1_msg_uber_ {
 	nvsp_1_msg_send_ndis_version            send_ndis_vers;
@@ -838,11 +978,18 @@ typedef union nvsp_2_msg_uber_ {
 	nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
 } __packed nvsp_2_msg_uber;
 
+typedef union nvsp_5_msg_uber_
+{
+        nvsp_5_subchannel_request               subchannel_request;
+        nvsp_5_subchannel_complete              subchn_complete;
+        nvsp_5_send_indirect_table              send_table;
+} __packed nvsp_5_msg_uber;
 
 typedef union nvsp_all_msgs_ {
 	nvsp_msg_init_uber                      init_msgs;
 	nvsp_1_msg_uber                         vers_1_msgs;
 	nvsp_2_msg_uber                         vers_2_msgs;
+	nvsp_5_msg_uber				vers_5_msgs;
 } __packed nvsp_all_msgs;
 
 /*
@@ -883,6 +1030,7 @@ typedef struct nvsp_msg_ {
 #define NETVSC_MAX_CONFIGURABLE_MTU		(9 * 1024)
 
 #define NETVSC_PACKET_SIZE			PAGE_SIZE
+#define VRSS_SEND_TABLE_SIZE			16
 
 /*
  * Data types
@@ -923,6 +1071,10 @@ typedef struct netvsc_dev_ {
 	hv_bool_uint8_t				destroy;
 	/* Negotiated NVSP version */
 	uint32_t				nvsp_version;
+
+	uint32_t                                num_channel;
+
+	uint32_t                                vrss_send_table[VRSS_SEND_TABLE_SIZE];
 } netvsc_dev;
 
 
@@ -1010,6 +1162,10 @@ struct hn_rx_ring {
 	u_long		hn_csum_trusted;
 	u_long		hn_lro_tried;
 	u_long		hn_small_pkts;
+	u_long		hn_pkts;
+
+	/* Rarely used stuffs */
+	struct sysctl_oid *hn_rx_sysctl_tree;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TRUST_HCSUM_IP	0x0001
@@ -1084,9 +1240,12 @@ typedef struct hn_softc {
 	int		hn_tx_ring_cnt;
 	int		hn_tx_ring_inuse;
 	struct hn_tx_ring *hn_tx_ring;
+
+	int		hn_cpu;
 	int		hn_tx_chimney_max;
 	struct taskqueue *hn_tx_taskq;
 	struct sysctl_oid *hn_tx_sysctl_tree;
+	struct sysctl_oid *hn_rx_sysctl_tree;
 } hn_softc_t;
 
 /*

Modified: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	Fri Mar  4 05:36:53 2016	(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	Fri Mar  4 06:52:11 2016	(r296379)
@@ -287,6 +287,8 @@ static int hn_single_tx_ring = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, single_tx_ring, CTLFLAG_RDTUN,
     &hn_single_tx_ring, 0, "Use one TX ring");
 
+static u_int hn_cpu_index;
+
 /*
  * Forward declarations
  */
@@ -438,6 +440,7 @@ netvsc_attach(device_t dev)
 	ring_cnt = hn_ring_cnt;
 	if (ring_cnt <= 0 || ring_cnt >= mp_ncpus)
 		ring_cnt = mp_ncpus;
+	sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
 
 	tx_ring_cnt = ring_cnt;
 	if (hn_single_tx_ring || hn_use_if_start) {
@@ -461,6 +464,7 @@ netvsc_attach(device_t dev)
 	chan->hv_chan_rxr = &sc->hn_rx_ring[0];
 	chan->hv_chan_txr = &sc->hn_tx_ring[0];
 	sc->hn_tx_ring[0].hn_chan = chan;
+	vmbus_channel_cpu_set(chan, sc->hn_cpu);
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_dunit = unit;
@@ -502,10 +506,17 @@ netvsc_attach(device_t dev)
 	error = hv_rf_on_device_add(device_ctx, &device_info, ring_cnt);
 	if (error)
 		goto failed;
+	KASSERT(sc->net_dev->num_channel <= ring_cnt,
+	    ("invalid channel count %u, should be less than %d",
+	     sc->net_dev->num_channel, ring_cnt));
 
-	/* TODO: vRSS */
-	sc->hn_tx_ring_inuse = 1;
-	sc->hn_rx_ring_inuse = 1;
+	/*
+	 * Set # of TX/RX rings that could be used according to
+	 * the # of channels that host offered.
+	 */
+	if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel)
+		sc->hn_tx_ring_inuse = sc->net_dev->num_channel;
+	sc->hn_rx_ring_inuse = sc->net_dev->num_channel;
 	device_printf(dev, "%d TX ring, %d RX ring\n",
 	    sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
 
@@ -1337,6 +1348,7 @@ skip:
 	 */
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	rxr->hn_pkts++;
 
 	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
@@ -2074,6 +2086,13 @@ hn_create_rx_data(struct hn_softc *sc, i
 #endif
 #endif	/* INET || INET6 */
 
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+	/* Create dev.hn.UNIT.rx sysctl tree */
+	sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
+	    CTLFLAG_RD, 0, "");
+
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
@@ -2101,10 +2120,27 @@ hn_create_rx_data(struct hn_softc *sc, i
 		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif
 #endif	/* INET || INET6 */
-	}
 
-	ctx = device_get_sysctl_ctx(dev);
-	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+		if (sc->hn_rx_sysctl_tree != NULL) {
+			char name[16];
+
+			/*
+			 * Create per RX ring sysctl tree:
+			 * dev.hn.UNIT.rx.RINGID
+			 */
+			snprintf(name, sizeof(name), "%d", i);
+			rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
+			    SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
+			    OID_AUTO, name, CTLFLAG_RD, 0, "");
+
+			if (rxr->hn_rx_sysctl_tree != NULL) {
+				SYSCTL_ADD_ULONG(ctx,
+				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+				    OID_AUTO, "packets", CTLFLAG_RW,
+				    &rxr->hn_pkts, "# of packets received");
+			}
+		}
+	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
 	    CTLTYPE_U64 | CTLFLAG_RW, sc,
@@ -2724,6 +2760,32 @@ hn_xmit_txeof_taskfunc(void *xtxr, int p
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
+void
+netvsc_subchan_callback(struct hn_softc *sc, struct hv_vmbus_channel *chan)
+{
+	int idx;
+
+	KASSERT(!HV_VMBUS_CHAN_ISPRIMARY(chan),
+	    ("subchannel callback on primary channel"));
+
+	idx = chan->offer_msg.offer.sub_channel_index;
+	KASSERT(idx > 0 && idx < sc->hn_rx_ring_inuse,
+	    ("invalid channel index %d, should > 0 && < %d",
+	     idx, sc->hn_rx_ring_inuse));
+	vmbus_channel_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
+
+	chan->hv_chan_rxr = &sc->hn_rx_ring[idx];
+	if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n",
+	    idx, chan->offer_msg.child_rel_id);
+
+	if (idx < sc->hn_tx_ring_inuse) {
+		chan->hv_chan_txr = &sc->hn_tx_ring[idx];
+		sc->hn_tx_ring[idx].hn_chan = chan;
+		if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n",
+		    idx, chan->offer_msg.child_rel_id);
+	}
+}
+
 static void
 hn_tx_taskq_create(void *arg __unused)
 {

Modified: head/sys/dev/hyperv/netvsc/hv_rndis.h
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis.h	Fri Mar  4 05:36:53 2016	(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_rndis.h	Fri Mar  4 06:52:11 2016	(r296379)
@@ -167,6 +167,14 @@
 #define RNDIS_OID_GEN_MACHINE_NAME                      0x0001021A
 #define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER            0x0001021B
 
+/*
+ * For receive side scale
+ */
+/* Query only */
+#define RNDIS_OID_GEN_RSS_CAPABILITIES			0x00010203
+/* Query and set */
+#define RNDIS_OID_GEN_RSS_PARAMETERS			0x00010204
+
 #define RNDIS_OID_GEN_XMIT_OK                           0x00020101
 #define RNDIS_OID_GEN_RCV_OK                            0x00020102
 #define RNDIS_OID_GEN_XMIT_ERROR                        0x00020103
@@ -1060,6 +1068,8 @@ struct hv_vmbus_channel;
 int netvsc_recv(struct hv_vmbus_channel *chan,
     netvsc_packet *packet, rndis_tcp_ip_csum_info *csum_info);
 void netvsc_channel_rollup(struct hv_vmbus_channel *chan);
+void netvsc_subchan_callback(struct hn_softc *sc,
+    struct hv_vmbus_channel *chan);
 
 void* hv_set_rppi_data(rndis_msg *rndis_mesg,
     uint32_t rppi_size,

Modified: head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.c	Fri Mar  4 05:36:53 2016	(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.c	Fri Mar  4 06:52:11 2016	(r296379)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/pmap.h>
 
 #include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
 #include "hv_net_vsc.h"
 #include "hv_rndis.h"
 #include "hv_rndis_filter.h"
@@ -223,6 +224,8 @@ hv_rf_send_request(rndis_device *device,
 {
 	int ret;
 	netvsc_packet *packet;
+	netvsc_dev      *net_dev = device->net_dev;
+	int send_buf_section_idx;
 
 	/* Set up the packet to send it */
 	packet = &request->pkt;
@@ -237,6 +240,20 @@ hv_rf_send_request(rndis_device *device,
 	packet->page_buffers[0].offset =
 	    (unsigned long)&request->request_msg & (PAGE_SIZE - 1);
 
+	if (packet->page_buffers[0].offset +
+		packet->page_buffers[0].length > PAGE_SIZE) {
+		packet->page_buf_count = 2;
+		packet->page_buffers[0].length =
+		        PAGE_SIZE - packet->page_buffers[0].offset;
+		packet->page_buffers[1].pfn =
+		        hv_get_phys_addr((char*)&request->request_msg +
+                		packet->page_buffers[0].length) >> PAGE_SHIFT;
+		packet->page_buffers[1].offset = 0;
+		packet->page_buffers[1].length =
+		        request->request_msg.msg_len -
+			        packet->page_buffers[0].length;
+	}
+
 	packet->compl.send.send_completion_context = request; /* packet */
 	if (message_type != REMOTE_NDIS_HALT_MSG) {
 		packet->compl.send.on_send_completion =
@@ -246,10 +263,25 @@ hv_rf_send_request(rndis_device *device,
 		    hv_rf_on_send_request_halt_completion;
 	}
 	packet->compl.send.send_completion_tid = (unsigned long)device;
-	packet->send_buf_section_idx =
-	    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
+	if (packet->tot_data_buf_len < net_dev->send_section_size) {
+		send_buf_section_idx = hv_nv_get_next_send_section(net_dev);
+		if (send_buf_section_idx !=
+			NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
+			char *dest = ((char *)net_dev->send_buf +
+				send_buf_section_idx * net_dev->send_section_size);
+
+			memcpy(dest, &request->request_msg, request->request_msg.msg_len);
+			packet->send_buf_section_idx = send_buf_section_idx;
+			packet->send_buf_section_size = packet->tot_data_buf_len;
+			packet->page_buf_count = 0;
+			goto sendit;
+		}
+		/* Failed to allocate chimney send buffer; move on */
+	}
+	packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
 	packet->send_buf_section_size = 0;
 
+sendit:
 	ret = hv_nv_on_send(device->net_dev->dev->channel, packet);
 
 	return (ret);
@@ -527,6 +559,19 @@ hv_rf_query_device(rndis_device *device,
 	query->info_buffer_length = 0;
 	query->device_vc_handle = 0;
 
+	if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) {
+		struct rndis_recv_scale_cap *cap;
+
+		request->request_msg.msg_len += 
+			sizeof(struct rndis_recv_scale_cap);
+		query->info_buffer_length = sizeof(struct rndis_recv_scale_cap);
+		cap = (struct rndis_recv_scale_cap *)((unsigned long)query + 
+						query->info_buffer_offset);
+		cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES;
+		cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
+		cap->hdr.size = sizeof(struct rndis_recv_scale_cap);
+	}
+
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
 	if (ret != 0) {
 		/* Fixme:  printf added */
@@ -581,6 +626,114 @@ hv_rf_query_device_link_status(rndis_dev
 	    RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
 }
 
+static uint8_t netvsc_hash_key[HASH_KEYLEN] = {
+	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
+/*
+ * RNDIS set vRSS parameters
+ */
+static int
+hv_rf_set_rss_param(rndis_device *device, int num_queue)
+{
+	rndis_request *request;
+	rndis_set_request *set;
+	rndis_set_complete *set_complete;
+	rndis_recv_scale_param *rssp;
+	uint32_t extlen = sizeof(rndis_recv_scale_param) +
+	    (4 * ITAB_NUM) + HASH_KEYLEN;
+	uint32_t *itab, status;
+	uint8_t *keyp;
+	int i, ret;
+
+
+	request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
+	    RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
+	if (request == NULL) {
+		if (bootverbose)
+			printf("Netvsc: No memory to set vRSS parameters.\n");
+		ret = -1;
+		goto cleanup;
+	}
+
+	set = &request->request_msg.msg.set_request;
+	set->oid = RNDIS_OID_GEN_RSS_PARAMETERS;
+	set->info_buffer_length = extlen;
+	set->info_buffer_offset = sizeof(rndis_set_request);
+	set->device_vc_handle = 0;
+
+	/* Fill out the rssp parameter structure */
+	rssp = (rndis_recv_scale_param *)(set + 1);
+	rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS;
+	rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
+	rssp->hdr.size = sizeof(rndis_recv_scale_param);
+	rssp->flag = 0;
+	rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 |
+	    RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6;
+	rssp->indirect_tabsize = 4 * ITAB_NUM;
+	rssp->indirect_taboffset = sizeof(rndis_recv_scale_param);
+	rssp->hashkey_size = HASH_KEYLEN;
+	rssp->hashkey_offset = rssp->indirect_taboffset +
+	    rssp->indirect_tabsize;
+
+	/* Set indirection table entries */
+	itab = (uint32_t *)(rssp + 1);
+	for (i = 0; i < ITAB_NUM; i++)
+		itab[i] = i % num_queue;
+
+	/* Set hash key values */
+	keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset);
+	for (i = 0; i < HASH_KEYLEN; i++)
+		keyp[i] = netvsc_hash_key[i];
+
+	ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
+	if (ret != 0) {
+		goto cleanup;
+	}
+
+	/*
+	 * Wait for the response from the host.  Another thread will signal
+	 * us when the response has arrived.  In the failure case,
+	 * sema_timedwait() returns a non-zero status after waiting 5 seconds.
+	 */
+	ret = sema_timedwait(&request->wait_sema, 5 * hz);
+	if (ret == 0) {
+		/* Response received, check status */
+		set_complete = &request->response_msg.msg.set_complete;
+		status = set_complete->status;
+		if (status != RNDIS_STATUS_SUCCESS) {
+			/* Bad response status, return error */
+			if (bootverbose)
+				printf("Netvsc: Failed to set vRSS "
+				    "parameters.\n");
+			ret = -2;
+		} else {
+			if (bootverbose)
+				printf("Netvsc: Successfully set vRSS "
+				    "parameters.\n");
+		}
+	} else {
+		/*
+		 * We cannot deallocate the request since we may still
+		 * receive a send completion for it.
+		 */
+		printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n",
+		    request->request_msg.msg.init_request.request_id, ret);
+		goto exit;
+	}
+
+cleanup:
+	if (request != NULL) {
+		hv_put_rndis_request(device, request);
+	}
+exit:
+	return (ret);
+}
+
 /*
  * RNDIS filter set packet filter
  * Sends an rndis request with the new filter, then waits for a response
@@ -816,12 +969,15 @@ hv_rf_close_device(rndis_device *device)
  */
 int
 hv_rf_on_device_add(struct hv_device *device, void *additl_info,
-    int nchan __unused)
+    int nchan)
 {
 	int ret;
 	netvsc_dev *net_dev;
 	rndis_device *rndis_dev;
+	nvsp_msg *init_pkt;
 	rndis_offload_params offloads;
+	struct rndis_recv_scale_cap rsscaps;
+	uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap);
 	netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
 	device_t dev = device->device;
 
@@ -887,6 +1043,67 @@ hv_rf_on_device_add(struct hv_device *de
 	
 	dev_info->link_state = rndis_dev->link_status;
 
+	net_dev->num_channel = 1;
+	if (net_dev->nvsp_version < NVSP_PROTOCOL_VERSION_5 || nchan == 1)
+		return (0);
+
+	memset(&rsscaps, 0, rsscaps_size);
+	ret = hv_rf_query_device(rndis_dev,
+			RNDIS_OID_GEN_RSS_CAPABILITIES,
+			&rsscaps, &rsscaps_size);
+	if ((ret != 0) || (rsscaps.num_recv_que < 2)) {
+		device_printf(dev, "hv_rf_query_device failed or "
+			"rsscaps.num_recv_que < 2 \n");
+		goto out;
+	}
+	device_printf(dev, "channel, offered %u, requested %d\n",
+	    rsscaps.num_recv_que, nchan);
+	if (nchan > rsscaps.num_recv_que)
+		nchan = rsscaps.num_recv_que;
+	net_dev->num_channel = nchan;
+
+	if (net_dev->num_channel == 1) {
+		device_printf(dev, "net_dev->num_channel == 1 under VRSS\n");
+		goto out;
+	}
+	
+	/* request host to create sub channels */
+	init_pkt = &net_dev->channel_init_packet;
+	memset(init_pkt, 0, sizeof(nvsp_msg));
+
+	init_pkt->hdr.msg_type = nvsp_msg5_type_subchannel;
+	init_pkt->msgs.vers_5_msgs.subchannel_request.op =
+	    NVSP_SUBCHANNE_ALLOCATE;
+	init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels =
+	    net_dev->num_channel - 1;
+
+	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
+	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret != 0) {
+		device_printf(dev, "Fail to allocate subchannel\n");
+		goto out;
+	}
+
+	sema_wait(&net_dev->channel_init_sema);
+
+	if (init_pkt->msgs.vers_5_msgs.subchn_complete.status !=
+	    nvsp_status_success) {
+		ret = ENODEV;
+		device_printf(dev, "sub channel complete error\n");
+		goto out;
+	}
+
+	net_dev->num_channel = 1 +
+	    init_pkt->msgs.vers_5_msgs.subchn_complete.num_subchannels;
+
+	ret = hv_rf_set_rss_param(rndis_dev, net_dev->num_channel);
+
+out:
+	if (ret)
+		net_dev->num_channel = 1;
+
 	return (ret);
 }
 

Modified: head/sys/dev/hyperv/netvsc/hv_rndis_filter.h
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.h	Fri Mar  4 05:36:53 2016	(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.h	Fri Mar  4 06:52:11 2016	(r296379)
@@ -63,17 +63,32 @@ typedef struct rndis_request_ {
 	struct sema			wait_sema;	
 
 	/*
-	 * Fixme:  We assumed a fixed size response here.  If we do ever
-	 * need to handle a bigger response, we can either define a max
-	 * response message or add a response buffer variable above this field
+	 * The max response size is sizeof(rndis_msg) + PAGE_SIZE.
+	 *
+	 * XXX
+	 * This is ugly and should be cleaned up once we busdma-fy
+	 * RNDIS request bits.
 	 */
 	rndis_msg			response_msg;
+	uint8_t				buf_resp[PAGE_SIZE];
 
 	/* Simplify allocation by having a netvsc packet inline */
 	netvsc_packet			pkt;
 	hv_vmbus_page_buffer		buffer;
-	/* Fixme:  We assumed a fixed size request here. */
+
+	/*
+	 * The max request size is sizeof(rndis_msg) + PAGE_SIZE.
+	 *
+	 * NOTE:
+	 * This is required for the large request like RSS settings.
+	 *
+	 * XXX
+	 * This is ugly and should be cleaned up once we busdma-fy
+	 * RNDIS request bits.
+	 */
 	rndis_msg			request_msg;
+	uint8_t				buf_req[PAGE_SIZE];
+
 	/* Fixme:  Poor man's semaphore. */
 	uint32_t			halt_complete_flag;
 } rndis_request;

Modified: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
==============================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	Fri Mar  4 05:36:53 2016	(r296378)
+++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	Fri Mar  4 06:52:11 2016	(r296379)
@@ -274,7 +274,7 @@ vmbus_channel_process_offer(hv_vmbus_cha
 	}
 }
 
-static void
+void
 vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
 {
 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));


More information about the svn-src-all mailing list