svn commit: r301943 - in stable/10/sys/dev/hyperv: include netvsc vmbus
Sepherosa Ziehau
sephe at FreeBSD.org
Thu Jun 16 02:48:19 UTC 2016
Author: sephe
Date: Thu Jun 16 02:48:18 2016
New Revision: 301943
URL: https://svnweb.freebsd.org/changeset/base/301943
Log:
MFC 296379,296380,296381,296593,296594,296595
296379
hyperv/hn: Add multiple channel support, a.k.a. vRSS
Each channel contains one RX ring and one TX ring. And we
try to distribute the channels to different evenly.
Note: Currently we don't have enough information to extract
the RSS type and RSS hash value from the received packets.
This greatly improves the TX/RX performance for 8 virtual CPU
Hyper-V over 10Ge: it can max out 10Ge for TCP when multiple
RX/TX rings are enabled.
This almost doubles the TX/RX performance for locally connected
Hyper-Vs: was 6Gbps w/ 128 TCP streams, now 11Gbps w/ multiple
RX/TX rings enabled.
It is not enabled by default; it will be switched on after more
tests.
Collaborated with: Hongjiang Zhang <honzhan microsoft com>
MFC after: 2 week
Sponsored by: Microsoft OSTC
296380
hyperv/hn: Pass channel to send done callbacks.
Mainly to strigent the data packet send done check.
MFC after: 2 weeks
Sponsored by: Microsoft OSTC
296381
hyperv/hn: Add per-TX ring stats for # of transmitted packets
MFC after: 2 weeks
Sponsored by: Microsoft OSTC
296593
hyperv/hn: Move if_initname to an earlier place
So that functions shared w/ attach path could use if_printf().
While I'm here, remove unnecessary if_dunit and if_dname assignment.
MFC after: 1 week
Sponsored by: Microsoft OSTC
Differential Revision: https://reviews.freebsd.org/D5576
296594
hyperv/hn: Factor out hn_channel_attach
MFC after: 1 week
Sponsored by: Microsoft OSTC
Differential Revision: https://reviews.freebsd.org/D5577
296595
hyperv/hn: Make the # of TX rings configurable.
Rename the tunables to avoid confusion.
MFC after: 1 week
Sponsored by: Microsoft OSTC
Differential Revision: https://reviews.freebsd.org/D5578
Modified:
stable/10/sys/dev/hyperv/include/hyperv.h
stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c
stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h
stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
stable/10/sys/dev/hyperv/netvsc/hv_rndis.h
stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c
stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.h
stable/10/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
Directory Properties:
stable/10/ (props changed)
Modified: stable/10/sys/dev/hyperv/include/hyperv.h
==============================================================================
--- stable/10/sys/dev/hyperv/include/hyperv.h Thu Jun 16 01:57:16 2016 (r301942)
+++ stable/10/sys/dev/hyperv/include/hyperv.h Thu Jun 16 02:48:18 2016 (r301943)
@@ -911,6 +911,8 @@ int hv_vmbus_channel_teardown_gpdal(
struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
+void vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu);
+
/**
* @brief Get physical address from virtual
*/
Modified: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c Thu Jun 16 01:57:16 2016 (r301942)
+++ stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c Thu Jun 16 02:48:18 2016 (r301943)
@@ -56,14 +56,14 @@ MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper
/*
* Forward declarations
*/
-static void hv_nv_on_channel_callback(void *context);
+static void hv_nv_on_channel_callback(void *xchan);
static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
static int hv_nv_connect_to_vsp(struct hv_device *device);
static void hv_nv_on_send_completion(netvsc_dev *net_dev,
- struct hv_device *device, hv_vm_packet_descriptor *pkt);
+ struct hv_device *device, struct hv_vmbus_channel *, hv_vm_packet_descriptor *pkt);
static void hv_nv_on_receive_completion(struct hv_vmbus_channel *chan,
uint64_t tid, uint32_t status);
static void hv_nv_on_receive(netvsc_dev *net_dev,
@@ -661,6 +661,34 @@ hv_nv_disconnect_from_vsp(netvsc_dev *ne
}
/*
+ * Callback handler for subchannel offer
+ * @@param context new subchannel
+ */
+static void
+hv_nv_subchan_callback(void *xchan)
+{
+ struct hv_vmbus_channel *chan = xchan;
+ netvsc_dev *net_dev;
+ uint16_t chn_index = chan->offer_msg.offer.sub_channel_index;
+ struct hv_device *device = chan->device;
+ hn_softc_t *sc = device_get_softc(device->device);
+ int ret;
+
+ net_dev = sc->net_dev;
+
+ if (chn_index >= net_dev->num_channel) {
+ /* Would this ever happen? */
+ return;
+ }
+ netvsc_subchan_callback(sc, chan);
+
+ chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
+ ret = hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE,
+ NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0,
+ hv_nv_on_channel_callback, chan);
+}
+
+/*
* Net VSC on device add
*
* Callback when the device belonging to this driver is added
@@ -692,6 +720,7 @@ hv_nv_on_device_add(struct hv_device *de
free(chan->hv_chan_rdbuf, M_NETVSC);
goto cleanup;
}
+ chan->sc_creation_callback = hv_nv_subchan_callback;
/*
* Connect with the NetVsp
@@ -757,7 +786,8 @@ hv_nv_on_device_remove(struct hv_device
*/
static void
hv_nv_on_send_completion(netvsc_dev *net_dev,
- struct hv_device *device, hv_vm_packet_descriptor *pkt)
+ struct hv_device *device, struct hv_vmbus_channel *chan,
+ hv_vm_packet_descriptor *pkt)
{
nvsp_msg *nvsp_msg_pkt;
netvsc_packet *net_vsc_pkt;
@@ -769,7 +799,9 @@ hv_nv_on_send_completion(netvsc_dev *net
|| nvsp_msg_pkt->hdr.msg_type
== nvsp_msg_1_type_send_rx_buf_complete
|| nvsp_msg_pkt->hdr.msg_type
- == nvsp_msg_1_type_send_send_buf_complete) {
+ == nvsp_msg_1_type_send_send_buf_complete
+ || nvsp_msg_pkt->hdr.msg_type
+ == nvsp_msg5_type_subchannel) {
/* Copy the response back */
memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
sizeof(nvsp_msg));
@@ -806,7 +838,7 @@ hv_nv_on_send_completion(netvsc_dev *net
}
/* Notify the layer above us */
- net_vsc_pkt->compl.send.on_send_completion(
+ net_vsc_pkt->compl.send.on_send_completion(chan,
net_vsc_pkt->compl.send.send_completion_context);
}
@@ -963,6 +995,46 @@ retry_send_cmplt:
}
/*
+ * Net VSC receiving vRSS send table from VSP
+ */
+static void
+hv_nv_send_table(struct hv_device *device, hv_vm_packet_descriptor *pkt)
+{
+ netvsc_dev *net_dev;
+ nvsp_msg *nvsp_msg_pkt;
+ int i;
+ uint32_t count, *table;
+
+ net_dev = hv_nv_get_inbound_net_device(device);
+ if (!net_dev)
+ return;
+
+ nvsp_msg_pkt =
+ (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
+
+ if (nvsp_msg_pkt->hdr.msg_type !=
+ nvsp_msg5_type_send_indirection_table) {
+ printf("Netvsc: !Warning! receive msg type not "
+ "send_indirection_table. type = %d\n",
+ nvsp_msg_pkt->hdr.msg_type);
+ return;
+ }
+
+ count = nvsp_msg_pkt->msgs.vers_5_msgs.send_table.count;
+ if (count != VRSS_SEND_TABLE_SIZE) {
+ printf("Netvsc: Received wrong send table size: %u\n", count);
+ return;
+ }
+
+ table = (uint32_t *)
+ ((unsigned long)&nvsp_msg_pkt->msgs.vers_5_msgs.send_table +
+ nvsp_msg_pkt->msgs.vers_5_msgs.send_table.offset);
+
+ for (i = 0; i < count; i++)
+ net_dev->vrss_send_table[i] = table[i];
+}
+
+/*
* Net VSC on channel callback
*/
static void
@@ -993,11 +1065,15 @@ hv_nv_on_channel_callback(void *xchan)
desc = (hv_vm_packet_descriptor *)buffer;
switch (desc->type) {
case HV_VMBUS_PACKET_TYPE_COMPLETION:
- hv_nv_on_send_completion(net_dev, device, desc);
+ hv_nv_on_send_completion(net_dev, device,
+ chan, desc);
break;
case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
hv_nv_on_receive(net_dev, device, chan, desc);
break;
+ case HV_VMBUS_PACKET_TYPE_DATA_IN_BAND:
+ hv_nv_send_table(device, desc);
+ break;
default:
device_printf(dev,
"hv_cb recv unknow type %d "
Modified: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h Thu Jun 16 01:57:16 2016 (r301942)
+++ stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h Thu Jun 16 02:48:18 2016 (r301943)
@@ -86,6 +86,92 @@ MALLOC_DECLARE(M_NETVSC);
*/
#define NVSP_MAX_PACKETS_PER_RECEIVE 375
+/* vRSS stuff */
+#define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
+#define RNDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
+
+#define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
+#define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
+
+struct rndis_obj_header {
+ uint8_t type;
+ uint8_t rev;
+ uint16_t size;
+} __packed;
+
+/* rndis_recv_scale_cap/cap_flag */
+#define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
+#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000
+#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000
+#define RNDIS_RSS_CAPS_USING_MSI_X 0x08000000
+#define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000
+#define RNDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400
+
+/* RNDIS_RECEIVE_SCALE_CAPABILITIES */
+struct rndis_recv_scale_cap {
+ struct rndis_obj_header hdr;
+ uint32_t cap_flag;
+ uint32_t num_int_msg;
+ uint32_t num_recv_que;
+ uint16_t num_indirect_tabent;
+} __packed;
+
+/* rndis_recv_scale_param flags */
+#define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001
+#define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002
+#define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004
+#define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008
+#define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010
+
+/* Hash info bits */
+#define RNDIS_HASH_FUNC_TOEPLITZ 0x00000001
+#define RNDIS_HASH_IPV4 0x00000100
+#define RNDIS_HASH_TCP_IPV4 0x00000200
+#define RNDIS_HASH_IPV6 0x00000400
+#define RNDIS_HASH_IPV6_EX 0x00000800
+#define RNDIS_HASH_TCP_IPV6 0x00001000
+#define RNDIS_HASH_TCP_IPV6_EX 0x00002000
+
+#define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
+#define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
+
+#define ITAB_NUM 128
+#define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
+
+/* RNDIS_RECEIVE_SCALE_PARAMETERS */
+typedef struct rndis_recv_scale_param_ {
+ struct rndis_obj_header hdr;
+
+ /* Qualifies the rest of the information */
+ uint16_t flag;
+
+ /* The base CPU number to do receive processing. not used */
+ uint16_t base_cpu_number;
+
+ /* This describes the hash function and type being enabled */
+ uint32_t hashinfo;
+
+ /* The size of indirection table array */
+ uint16_t indirect_tabsize;
+
+ /* The offset of the indirection table from the beginning of this
+ * structure
+ */
+ uint32_t indirect_taboffset;
+
+ /* The size of the hash secret key */
+ uint16_t hashkey_size;
+
+ /* The offset of the secret key from the beginning of this structure */
+ uint32_t hashkey_offset;
+
+ uint32_t processor_masks_offset;
+ uint32_t num_processor_masks;
+ uint32_t processor_masks_entry_size;
+} rndis_recv_scale_param;
typedef enum nvsp_msg_type_ {
nvsp_msg_type_none = 0,
@@ -146,6 +232,27 @@ typedef enum nvsp_msg_type_ {
nvsp_msg_2_type_alloc_chimney_handle,
nvsp_msg_2_type_alloc_chimney_handle_complete,
+
+ nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete,
+
+ /*
+ * Version 4 Messages
+ */
+ nvsp_msg4_type_send_vf_association,
+ nvsp_msg4_type_switch_data_path,
+ nvsp_msg4_type_uplink_connect_state_deprecated,
+
+ nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated,
+
+ /*
+ * Version 5 Messages
+ */
+ nvsp_msg5_type_oid_query_ex,
+ nvsp_msg5_type_oid_query_ex_comp,
+ nvsp_msg5_type_subchannel,
+ nvsp_msg5_type_send_indirection_table,
+
+ nvsp_msg5_max = nvsp_msg5_type_send_indirection_table,
} nvsp_msg_type;
typedef enum nvsp_status_ {
@@ -793,6 +900,39 @@ typedef struct nvsp_2_msg_send_vmq_rndis
uint32_t status;
} __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
+/*
+ * Version 5 messages
+ */
+enum nvsp_subchannel_operation {
+ NVSP_SUBCHANNEL_NONE = 0,
+ NVSP_SUBCHANNE_ALLOCATE,
+ NVSP_SUBCHANNE_MAX
+};
+
+typedef struct nvsp_5_subchannel_request_
+{
+ uint32_t op;
+ uint32_t num_subchannels;
+} __packed nvsp_5_subchannel_request;
+
+typedef struct nvsp_5_subchannel_complete_
+{
+ uint32_t status;
+ /* Actual number of subchannels allocated */
+ uint32_t num_subchannels;
+} __packed nvsp_5_subchannel_complete;
+
+typedef struct nvsp_5_send_indirect_table_
+{
+ /* The number of entries in the send indirection table */
+ uint32_t count;
+ /*
+ * The offset of the send indireciton table from top of
+ * this struct. The send indirection table tells which channel
+ * to put the send traffic on. Each entry is a channel number.
+ */
+ uint32_t offset;
+} __packed nvsp_5_send_indirect_table;
typedef union nvsp_1_msg_uber_ {
nvsp_1_msg_send_ndis_version send_ndis_vers;
@@ -838,11 +978,18 @@ typedef union nvsp_2_msg_uber_ {
nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
} __packed nvsp_2_msg_uber;
+typedef union nvsp_5_msg_uber_
+{
+ nvsp_5_subchannel_request subchannel_request;
+ nvsp_5_subchannel_complete subchn_complete;
+ nvsp_5_send_indirect_table send_table;
+} __packed nvsp_5_msg_uber;
typedef union nvsp_all_msgs_ {
nvsp_msg_init_uber init_msgs;
nvsp_1_msg_uber vers_1_msgs;
nvsp_2_msg_uber vers_2_msgs;
+ nvsp_5_msg_uber vers_5_msgs;
} __packed nvsp_all_msgs;
/*
@@ -883,6 +1030,7 @@ typedef struct nvsp_msg_ {
#define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024)
#define NETVSC_PACKET_SIZE PAGE_SIZE
+#define VRSS_SEND_TABLE_SIZE 16
/*
* Data types
@@ -923,10 +1071,15 @@ typedef struct netvsc_dev_ {
hv_bool_uint8_t destroy;
/* Negotiated NVSP version */
uint32_t nvsp_version;
+
+ uint32_t num_channel;
+
+ uint32_t vrss_send_table[VRSS_SEND_TABLE_SIZE];
} netvsc_dev;
+struct hv_vmbus_channel;
-typedef void (*pfn_on_send_rx_completion)(void *);
+typedef void (*pfn_on_send_rx_completion)(struct hv_vmbus_channel *, void *);
#define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE)
#define NETVSC_PACKET_MAXPAGE 32
@@ -1010,13 +1163,18 @@ struct hn_rx_ring {
u_long hn_csum_trusted;
u_long hn_lro_tried;
u_long hn_small_pkts;
+ u_long hn_pkts;
+
+ /* Rarely used stuffs */
+ struct sysctl_oid *hn_rx_sysctl_tree;
+ int hn_rx_flags;
} __aligned(CACHE_LINE_SIZE);
#define HN_TRUST_HCSUM_IP 0x0001
#define HN_TRUST_HCSUM_TCP 0x0002
#define HN_TRUST_HCSUM_UDP 0x0004
-struct hv_vmbus_channel;
+#define HN_RX_FLAG_ATTACHED 0x1
struct hn_tx_ring {
#ifndef HN_USE_TXDESC_BUFRING
@@ -1053,13 +1211,17 @@ struct hn_tx_ring {
u_long hn_txdma_failed;
u_long hn_tx_collapsed;
u_long hn_tx_chimney;
+ u_long hn_pkts;
/* Rarely used stuffs */
struct hn_txdesc *hn_txdesc;
bus_dma_tag_t hn_tx_rndis_dtag;
struct sysctl_oid *hn_tx_sysctl_tree;
+ int hn_tx_flags;
} __aligned(CACHE_LINE_SIZE);
+#define HN_TX_FLAG_ATTACHED 0x1
+
/*
* Device-specific softc structure
*/
@@ -1085,9 +1247,12 @@ typedef struct hn_softc {
int hn_tx_ring_cnt;
int hn_tx_ring_inuse;
struct hn_tx_ring *hn_tx_ring;
+
+ int hn_cpu;
int hn_tx_chimney_max;
struct taskqueue *hn_tx_taskq;
struct sysctl_oid *hn_tx_sysctl_tree;
+ struct sysctl_oid *hn_rx_sysctl_tree;
} hn_softc_t;
/*
Modified: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Thu Jun 16 01:57:16 2016 (r301942)
+++ stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Thu Jun 16 02:48:18 2016 (r301943)
@@ -281,13 +281,16 @@ static int hn_use_if_start = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
&hn_use_if_start, 0, "Use if_start TX method");
-static int hn_ring_cnt = 1;
-SYSCTL_INT(_hw_hn, OID_AUTO, ring_cnt, CTLFLAG_RDTUN,
- &hn_ring_cnt, 0, "# of TX/RX rings to used");
-
-static int hn_single_tx_ring = 1;
-SYSCTL_INT(_hw_hn, OID_AUTO, single_tx_ring, CTLFLAG_RDTUN,
- &hn_single_tx_ring, 0, "Use one TX ring");
+static int hn_chan_cnt = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
+ &hn_chan_cnt, 0,
+ "# of channels to use; each channel has one RX ring and one TX ring");
+
+static int hn_tx_ring_cnt = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
+ &hn_tx_ring_cnt, 0, "# of TX rings to use");
+
+static u_int hn_cpu_index;
/*
* Forward declarations
@@ -327,6 +330,7 @@ static int hn_encap(struct hn_tx_ring *,
static void hn_create_rx_data(struct hn_softc *sc, int);
static void hn_destroy_rx_data(struct hn_softc *sc);
static void hn_set_tx_chimney_size(struct hn_softc *, int);
+static void hn_channel_attach(struct hn_softc *, struct hv_vmbus_channel *);
static int hn_transmit(struct ifnet *, struct mbuf *);
static void hn_xmit_qflush(struct ifnet *);
@@ -454,37 +458,46 @@ netvsc_attach(device_t dev)
ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
ifp->if_softc = sc;
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ring_cnt = hn_ring_cnt;
- if (ring_cnt <= 0 || ring_cnt >= mp_ncpus)
+ /*
+ * Figure out the # of RX rings (ring_cnt) and the # of TX rings
+ * to use (tx_ring_cnt).
+ *
+ * NOTE:
+ * The # of RX rings to use is same as the # of channels to use.
+ */
+ ring_cnt = hn_chan_cnt;
+ if (ring_cnt <= 0 || ring_cnt > mp_ncpus)
ring_cnt = mp_ncpus;
- tx_ring_cnt = ring_cnt;
- if (hn_single_tx_ring || hn_use_if_start) {
- /*
- * - Explicitly asked to use single TX ring.
- * - ifnet.if_start is used; ifnet.if_start only needs
- * one TX ring.
- */
+ tx_ring_cnt = hn_tx_ring_cnt;
+ if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
+ tx_ring_cnt = ring_cnt;
+ if (hn_use_if_start) {
+ /* ifnet.if_start only needs one TX ring. */
tx_ring_cnt = 1;
}
+
+ /*
+ * Set the leader CPU for channels.
+ */
+ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
+
error = hn_create_tx_data(sc, tx_ring_cnt);
if (error)
goto failed;
-
hn_create_rx_data(sc, ring_cnt);
/*
* Associate the first TX/RX ring w/ the primary channel.
*/
chan = device_ctx->channel;
- chan->hv_chan_rxr = &sc->hn_rx_ring[0];
- chan->hv_chan_txr = &sc->hn_tx_ring[0];
- sc->hn_tx_ring[0].hn_chan = chan;
-
- if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_dunit = unit;
- ifp->if_dname = NETVSC_DEVNAME;
+ KASSERT(HV_VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
+ KASSERT(chan->offer_msg.offer.sub_channel_index == 0,
+ ("primary channel subidx %u",
+ chan->offer_msg.offer.sub_channel_index));
+ hn_channel_attach(sc, chan);
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = hn_ioctl;
@@ -522,10 +535,18 @@ netvsc_attach(device_t dev)
error = hv_rf_on_device_add(device_ctx, &device_info, ring_cnt);
if (error)
goto failed;
+ KASSERT(sc->net_dev->num_channel > 0 &&
+ sc->net_dev->num_channel <= sc->hn_rx_ring_inuse,
+ ("invalid channel count %u, should be less than %d",
+ sc->net_dev->num_channel, sc->hn_rx_ring_inuse));
- /* TODO: vRSS */
- sc->hn_tx_ring_inuse = 1;
- sc->hn_rx_ring_inuse = 1;
+ /*
+ * Set the # of TX/RX rings that could be used according to
+ * the # of channels that host offered.
+ */
+ if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel)
+ sc->hn_tx_ring_inuse = sc->net_dev->num_channel;
+ sc->hn_rx_ring_inuse = sc->net_dev->num_channel;
device_printf(dev, "%d TX ring, %d RX ring\n",
sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
@@ -730,7 +751,7 @@ hn_txdesc_hold(struct hn_txdesc *txd)
}
static void
-hn_tx_done(void *xpkt)
+hn_tx_done(struct hv_vmbus_channel *chan, void *xpkt)
{
netvsc_packet *packet = xpkt;
struct hn_txdesc *txd;
@@ -740,6 +761,11 @@ hn_tx_done(void *xpkt)
packet->compl.send.send_completion_tid;
txr = txd->txr;
+ KASSERT(txr->hn_chan == chan,
+ ("channel mismatch, on channel%u, should be channel%u",
+ chan->offer_msg.offer.sub_channel_index,
+ txr->hn_chan->offer_msg.offer.sub_channel_index));
+
txr->hn_has_txeof = 1;
hn_txdesc_put(txr, txd);
}
@@ -1025,6 +1051,7 @@ again:
if (txd->m->m_flags & M_MCAST)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
}
+ txr->hn_pkts++;
}
hn_txdesc_put(txr, txd);
@@ -1357,6 +1384,7 @@ skip:
*/
ifp->if_ipackets++;
+ rxr->hn_pkts++;
if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
#if defined(INET) || defined(INET6)
@@ -2122,6 +2150,13 @@ hn_create_rx_data(struct hn_softc *sc, i
#endif
#endif /* INET || INET6 */
+ ctx = device_get_sysctl_ctx(dev);
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+ /* Create dev.hn.UNIT.rx sysctl tree */
+ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
+ CTLFLAG_RD, 0, "");
+
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
@@ -2149,10 +2184,27 @@ hn_create_rx_data(struct hn_softc *sc, i
rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
#endif
#endif /* INET || INET6 */
- }
- ctx = device_get_sysctl_ctx(dev);
- child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+ if (sc->hn_rx_sysctl_tree != NULL) {
+ char name[16];
+
+ /*
+ * Create per RX ring sysctl tree:
+ * dev.hn.UNIT.rx.RINGID
+ */
+ snprintf(name, sizeof(name), "%d", i);
+ rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
+ OID_AUTO, name, CTLFLAG_RD, 0, "");
+
+ if (rxr->hn_rx_sysctl_tree != NULL) {
+ SYSCTL_ADD_ULONG(ctx,
+ SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+ OID_AUTO, "packets", CTLFLAG_RW,
+ &rxr->hn_pkts, "# of packets received");
+ }
+ }
+ }
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
CTLTYPE_U64 | CTLFLAG_RW, sc,
@@ -2419,6 +2471,9 @@ hn_create_tx_ring(struct hn_softc *sc, i
CTLFLAG_RD, &txr->hn_oactive, 0,
"over active");
}
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
+ CTLFLAG_RW, &txr->hn_pkts,
+ "# of packets transmitted");
}
}
@@ -2783,6 +2838,55 @@ hn_xmit_txeof_taskfunc(void *xtxr, int p
}
static void
+hn_channel_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan)
+{
+ struct hn_rx_ring *rxr;
+ int idx;
+
+ idx = chan->offer_msg.offer.sub_channel_index;
+
+ KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
+ ("invalid channel index %d, should > 0 && < %d",
+ idx, sc->hn_rx_ring_inuse));
+ rxr = &sc->hn_rx_ring[idx];
+ KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
+ ("RX ring %d already attached", idx));
+ rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
+
+ chan->hv_chan_rxr = rxr;
+ if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n",
+ idx, chan->offer_msg.child_rel_id);
+
+ if (idx < sc->hn_tx_ring_inuse) {
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
+
+ KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
+ ("TX ring %d already attached", idx));
+ txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
+
+ chan->hv_chan_txr = txr;
+ txr->hn_chan = chan;
+ if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n",
+ idx, chan->offer_msg.child_rel_id);
+ }
+
+ /* Bind channel to a proper CPU */
+ vmbus_channel_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
+}
+
+void
+netvsc_subchan_callback(struct hn_softc *sc, struct hv_vmbus_channel *chan)
+{
+
+ KASSERT(!HV_VMBUS_CHAN_ISPRIMARY(chan),
+ ("subchannel callback on primary channel"));
+ KASSERT(chan->offer_msg.offer.sub_channel_index > 0,
+ ("invalid channel subidx %u",
+ chan->offer_msg.offer.sub_channel_index));
+ hn_channel_attach(sc, chan);
+}
+
+static void
hn_tx_taskq_create(void *arg __unused)
{
if (!hn_share_tx_taskq)
Modified: stable/10/sys/dev/hyperv/netvsc/hv_rndis.h
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_rndis.h Thu Jun 16 01:57:16 2016 (r301942)
+++ stable/10/sys/dev/hyperv/netvsc/hv_rndis.h Thu Jun 16 02:48:18 2016 (r301943)
@@ -167,6 +167,14 @@
#define RNDIS_OID_GEN_MACHINE_NAME 0x0001021A
#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
+/*
+ * For receive side scale
+ */
+/* Query only */
+#define RNDIS_OID_GEN_RSS_CAPABILITIES 0x00010203
+/* Query and set */
+#define RNDIS_OID_GEN_RSS_PARAMETERS 0x00010204
+
#define RNDIS_OID_GEN_XMIT_OK 0x00020101
#define RNDIS_OID_GEN_RCV_OK 0x00020102
#define RNDIS_OID_GEN_XMIT_ERROR 0x00020103
@@ -1060,6 +1068,8 @@ struct hv_vmbus_channel;
int netvsc_recv(struct hv_vmbus_channel *chan,
netvsc_packet *packet, rndis_tcp_ip_csum_info *csum_info);
void netvsc_channel_rollup(struct hv_vmbus_channel *chan);
+void netvsc_subchan_callback(struct hn_softc *sc,
+ struct hv_vmbus_channel *chan);
void* hv_set_rppi_data(rndis_msg *rndis_mesg,
uint32_t rppi_size,
Modified: stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c Thu Jun 16 01:57:16 2016 (r301942)
+++ stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c Thu Jun 16 02:48:18 2016 (r301943)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <vm/pmap.h>
#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
#include "hv_net_vsc.h"
#include "hv_rndis.h"
#include "hv_rndis_filter.h"
@@ -69,8 +70,8 @@ static int hv_rf_set_packet_filter(rndi
static int hv_rf_init_device(rndis_device *device);
static int hv_rf_open_device(rndis_device *device);
static int hv_rf_close_device(rndis_device *device);
-static void hv_rf_on_send_request_completion(void *context);
-static void hv_rf_on_send_request_halt_completion(void *context);
+static void hv_rf_on_send_request_completion(struct hv_vmbus_channel *, void *context);
+static void hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *, void *context);
int
hv_rf_send_offload_request(struct hv_device *device,
rndis_offload_params *offloads);
@@ -224,6 +225,8 @@ hv_rf_send_request(rndis_device *device,
{
int ret;
netvsc_packet *packet;
+ netvsc_dev *net_dev = device->net_dev;
+ int send_buf_section_idx;
/* Set up the packet to send it */
packet = &request->pkt;
@@ -238,6 +241,20 @@ hv_rf_send_request(rndis_device *device,
packet->page_buffers[0].offset =
(unsigned long)&request->request_msg & (PAGE_SIZE - 1);
+ if (packet->page_buffers[0].offset +
+ packet->page_buffers[0].length > PAGE_SIZE) {
+ packet->page_buf_count = 2;
+ packet->page_buffers[0].length =
+ PAGE_SIZE - packet->page_buffers[0].offset;
+ packet->page_buffers[1].pfn =
+ hv_get_phys_addr((char*)&request->request_msg +
+ packet->page_buffers[0].length) >> PAGE_SHIFT;
+ packet->page_buffers[1].offset = 0;
+ packet->page_buffers[1].length =
+ request->request_msg.msg_len -
+ packet->page_buffers[0].length;
+ }
+
packet->compl.send.send_completion_context = request; /* packet */
if (message_type != REMOTE_NDIS_HALT_MSG) {
packet->compl.send.on_send_completion =
@@ -247,10 +264,25 @@ hv_rf_send_request(rndis_device *device,
hv_rf_on_send_request_halt_completion;
}
packet->compl.send.send_completion_tid = (unsigned long)device;
- packet->send_buf_section_idx =
- NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
+ if (packet->tot_data_buf_len < net_dev->send_section_size) {
+ send_buf_section_idx = hv_nv_get_next_send_section(net_dev);
+ if (send_buf_section_idx !=
+ NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
+ char *dest = ((char *)net_dev->send_buf +
+ send_buf_section_idx * net_dev->send_section_size);
+
+ memcpy(dest, &request->request_msg, request->request_msg.msg_len);
+ packet->send_buf_section_idx = send_buf_section_idx;
+ packet->send_buf_section_size = packet->tot_data_buf_len;
+ packet->page_buf_count = 0;
+ goto sendit;
+ }
+ /* Failed to allocate chimney send buffer; move on */
+ }
+ packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
packet->send_buf_section_size = 0;
+sendit:
ret = hv_nv_on_send(device->net_dev->dev->channel, packet);
return (ret);
@@ -528,6 +560,19 @@ hv_rf_query_device(rndis_device *device,
query->info_buffer_length = 0;
query->device_vc_handle = 0;
+ if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) {
+ struct rndis_recv_scale_cap *cap;
+
+ request->request_msg.msg_len +=
+ sizeof(struct rndis_recv_scale_cap);
+ query->info_buffer_length = sizeof(struct rndis_recv_scale_cap);
+ cap = (struct rndis_recv_scale_cap *)((unsigned long)query +
+ query->info_buffer_offset);
+ cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES;
+ cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
+ cap->hdr.size = sizeof(struct rndis_recv_scale_cap);
+ }
+
ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
if (ret != 0) {
/* Fixme: printf added */
@@ -582,6 +627,114 @@ hv_rf_query_device_link_status(rndis_dev
RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
}
+static uint8_t netvsc_hash_key[HASH_KEYLEN] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
+/*
+ * RNDIS set vRSS parameters
+ */
+static int
+hv_rf_set_rss_param(rndis_device *device, int num_queue)
+{
+ rndis_request *request;
+ rndis_set_request *set;
+ rndis_set_complete *set_complete;
+ rndis_recv_scale_param *rssp;
+ uint32_t extlen = sizeof(rndis_recv_scale_param) +
+ (4 * ITAB_NUM) + HASH_KEYLEN;
+ uint32_t *itab, status;
+ uint8_t *keyp;
+ int i, ret;
+
+
+ request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
+ RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
+ if (request == NULL) {
+ if (bootverbose)
+ printf("Netvsc: No memory to set vRSS parameters.\n");
+ ret = -1;
+ goto cleanup;
+ }
+
+ set = &request->request_msg.msg.set_request;
+ set->oid = RNDIS_OID_GEN_RSS_PARAMETERS;
+ set->info_buffer_length = extlen;
+ set->info_buffer_offset = sizeof(rndis_set_request);
+ set->device_vc_handle = 0;
+
+ /* Fill out the rssp parameter structure */
+ rssp = (rndis_recv_scale_param *)(set + 1);
+ rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS;
+ rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
+ rssp->hdr.size = sizeof(rndis_recv_scale_param);
+ rssp->flag = 0;
+ rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 |
+ RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6;
+ rssp->indirect_tabsize = 4 * ITAB_NUM;
+ rssp->indirect_taboffset = sizeof(rndis_recv_scale_param);
+ rssp->hashkey_size = HASH_KEYLEN;
+ rssp->hashkey_offset = rssp->indirect_taboffset +
+ rssp->indirect_tabsize;
+
+ /* Set indirection table entries */
+ itab = (uint32_t *)(rssp + 1);
+ for (i = 0; i < ITAB_NUM; i++)
+ itab[i] = i % num_queue;
+
+ /* Set hash key values */
+ keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset);
+ for (i = 0; i < HASH_KEYLEN; i++)
+ keyp[i] = netvsc_hash_key[i];
+
+ ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ /*
+ * Wait for the response from the host. Another thread will signal
+ * us when the response has arrived. In the failure case,
+ * sema_timedwait() returns a non-zero status after waiting 5 seconds.
+ */
+ ret = sema_timedwait(&request->wait_sema, 5 * hz);
+ if (ret == 0) {
+ /* Response received, check status */
+ set_complete = &request->response_msg.msg.set_complete;
+ status = set_complete->status;
+ if (status != RNDIS_STATUS_SUCCESS) {
+ /* Bad response status, return error */
+ if (bootverbose)
+ printf("Netvsc: Failed to set vRSS "
+ "parameters.\n");
+ ret = -2;
+ } else {
+ if (bootverbose)
+ printf("Netvsc: Successfully set vRSS "
+ "parameters.\n");
+ }
+ } else {
+ /*
+ * We cannot deallocate the request since we may still
+ * receive a send completion for it.
+ */
+ printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n",
+ request->request_msg.msg.init_request.request_id, ret);
+ goto exit;
+ }
+
+cleanup:
+ if (request != NULL) {
+ hv_put_rndis_request(device, request);
+ }
+exit:
+ return (ret);
+}
+
/*
* RNDIS filter set packet filter
* Sends an rndis request with the new filter, then waits for a response
@@ -817,12 +970,15 @@ hv_rf_close_device(rndis_device *device)
*/
int
hv_rf_on_device_add(struct hv_device *device, void *additl_info,
- int nchan __unused)
+ int nchan)
{
int ret;
netvsc_dev *net_dev;
rndis_device *rndis_dev;
+ nvsp_msg *init_pkt;
rndis_offload_params offloads;
+ struct rndis_recv_scale_cap rsscaps;
+ uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap);
netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
device_t dev = device->device;
@@ -888,6 +1044,67 @@ hv_rf_on_device_add(struct hv_device *de
dev_info->link_state = rndis_dev->link_status;
+ net_dev->num_channel = 1;
+ if (net_dev->nvsp_version < NVSP_PROTOCOL_VERSION_5 || nchan == 1)
+ return (0);
+
+ memset(&rsscaps, 0, rsscaps_size);
+ ret = hv_rf_query_device(rndis_dev,
+ RNDIS_OID_GEN_RSS_CAPABILITIES,
+ &rsscaps, &rsscaps_size);
+ if ((ret != 0) || (rsscaps.num_recv_que < 2)) {
+ device_printf(dev, "hv_rf_query_device failed or "
+ "rsscaps.num_recv_que < 2 \n");
+ goto out;
+ }
+ device_printf(dev, "channel, offered %u, requested %d\n",
+ rsscaps.num_recv_que, nchan);
+ if (nchan > rsscaps.num_recv_que)
+ nchan = rsscaps.num_recv_que;
+ net_dev->num_channel = nchan;
+
+ if (net_dev->num_channel == 1) {
+ device_printf(dev, "net_dev->num_channel == 1 under VRSS\n");
+ goto out;
+ }
+
+ /* request host to create sub channels */
+ init_pkt = &net_dev->channel_init_packet;
+ memset(init_pkt, 0, sizeof(nvsp_msg));
+
+ init_pkt->hdr.msg_type = nvsp_msg5_type_subchannel;
+ init_pkt->msgs.vers_5_msgs.subchannel_request.op =
+ NVSP_SUBCHANNE_ALLOCATE;
+ init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels =
+ net_dev->num_channel - 1;
+
+ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+ sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret != 0) {
+ device_printf(dev, "Fail to allocate subchannel\n");
+ goto out;
+ }
+
+ sema_wait(&net_dev->channel_init_sema);
+
+ if (init_pkt->msgs.vers_5_msgs.subchn_complete.status !=
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list