svn commit: r299904 - head/sys/dev/sfxge/common

Andrew Rybchenko arybchik at FreeBSD.org
Mon May 16 06:38:53 UTC 2016


Author: arybchik
Date: Mon May 16 06:38:51 2016
New Revision: 299904
URL: https://svnweb.freebsd.org/changeset/base/299904

Log:
  sfxge(4): improve PCIe link speed and width check
  
  Perform a more accurate check of whether the PCIe bandwidth is
  sufficient for the current/supported port modes.
  
  Give a different warning if there is sufficient bandwidth to achieve
  line rate, but the link is not fast enough for optimal latency.
  
  Submitted by:   Mark Spender <mspender at solarflare.com>
  Sponsored by:   Solarflare Communications, Inc.
  MFC after:      1 week
  Differential Revision:  https://reviews.freebsd.org/D6369

Modified:
  head/sys/dev/sfxge/common/ef10_impl.h
  head/sys/dev/sfxge/common/ef10_nic.c
  head/sys/dev/sfxge/common/efx.h
  head/sys/dev/sfxge/common/efx_nic.c
  head/sys/dev/sfxge/common/hunt_nic.c
  head/sys/dev/sfxge/common/medford_nic.c
  head/sys/dev/sfxge/common/siena_nic.c

Modified: head/sys/dev/sfxge/common/ef10_impl.h
==============================================================================
--- head/sys/dev/sfxge/common/ef10_impl.h	Mon May 16 06:32:06 2016	(r299903)
+++ head/sys/dev/sfxge/common/ef10_impl.h	Mon May 16 06:38:51 2016	(r299904)
@@ -1035,7 +1035,13 @@ efx_mcdi_get_port_assignment(
 extern	__checkReturn	efx_rc_t
 efx_mcdi_get_port_modes(
 	__in		efx_nic_t *enp,
-	__out		uint32_t *modesp);
+	__out		uint32_t *modesp,
+	__out_opt	uint32_t *current_modep);
+
+extern	__checkReturn	efx_rc_t
+ef10_nic_get_port_mode_bandwidth(
+	__in		uint32_t port_mode,
+	__out		uint32_t *bandwidth_mbpsp);
 
 extern	__checkReturn	efx_rc_t
 efx_mcdi_get_mac_address_pf(

Modified: head/sys/dev/sfxge/common/ef10_nic.c
==============================================================================
--- head/sys/dev/sfxge/common/ef10_nic.c	Mon May 16 06:32:06 2016	(r299903)
+++ head/sys/dev/sfxge/common/ef10_nic.c	Mon May 16 06:38:51 2016	(r299904)
@@ -88,7 +88,8 @@ fail1:
 	__checkReturn	efx_rc_t
 efx_mcdi_get_port_modes(
 	__in		efx_nic_t *enp,
-	__out		uint32_t *modesp)
+	__out		uint32_t *modesp,
+	__out_opt	uint32_t *current_modep)
 {
 	efx_mcdi_req_t req;
 	uint8_t payload[MAX(MC_CMD_GET_PORT_MODES_IN_LEN,
@@ -113,19 +114,31 @@ efx_mcdi_get_port_modes(
 	}
 
 	/*
-	 * Require only Modes and DefaultMode fields.
-	 * (CurrentMode field was added for Medford)
+	 * Require only Modes and DefaultMode fields, unless the current mode
+	 * was requested (CurrentMode field was added for Medford).
 	 */
 	if (req.emr_out_length_used <
 	    MC_CMD_GET_PORT_MODES_OUT_CURRENT_MODE_OFST) {
 		rc = EMSGSIZE;
 		goto fail2;
 	}
+	if ((current_modep != NULL) && (req.emr_out_length_used <
+	    MC_CMD_GET_PORT_MODES_OUT_CURRENT_MODE_OFST + 4)) {
+		rc = EMSGSIZE;
+		goto fail3;
+	}
 
 	*modesp = MCDI_OUT_DWORD(req, GET_PORT_MODES_OUT_MODES);
 
+	if (current_modep != NULL) {
+		*current_modep = MCDI_OUT_DWORD(req,
+					    GET_PORT_MODES_OUT_CURRENT_MODE);
+	}
+
 	return (0);
 
+fail3:
+	EFSYS_PROBE(fail3);
 fail2:
 	EFSYS_PROBE(fail2);
 fail1:
@@ -134,6 +147,50 @@ fail1:
 	return (rc);
 }
 
+	__checkReturn	efx_rc_t
+ef10_nic_get_port_mode_bandwidth(
+	__in		uint32_t port_mode,
+	__out		uint32_t *bandwidth_mbpsp)
+{
+	uint32_t bandwidth;
+	efx_rc_t rc;
+
+	switch (port_mode) {
+	case TLV_PORT_MODE_10G:
+		bandwidth = 10000;
+		break;
+	case TLV_PORT_MODE_10G_10G:
+		bandwidth = 10000 * 2;
+		break;
+	case TLV_PORT_MODE_10G_10G_10G_10G:
+	case TLV_PORT_MODE_10G_10G_10G_10G_Q:
+	case TLV_PORT_MODE_10G_10G_10G_10G_Q2:
+		bandwidth = 10000 * 4;
+		break;
+	case TLV_PORT_MODE_40G:
+		bandwidth = 40000;
+		break;
+	case TLV_PORT_MODE_40G_40G:
+		bandwidth = 40000 * 2;
+		break;
+	case TLV_PORT_MODE_40G_10G_10G:
+	case TLV_PORT_MODE_10G_10G_40G:
+		bandwidth = 40000 + (10000 * 2);
+		break;
+	default:
+		rc = EINVAL;
+		goto fail1;
+	}
+
+	*bandwidth_mbpsp = bandwidth;
+
+	return (0);
+
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+	return (rc);
+}
 
 static	__checkReturn		efx_rc_t
 efx_mcdi_vadaptor_alloc(
@@ -1090,7 +1147,7 @@ ef10_external_port_mapping(
 	uint32_t matches;
 	uint32_t stride = 1; /* default 1-1 mapping */
 
-	if ((rc = efx_mcdi_get_port_modes(enp, &port_modes)) != 0) {
+	if ((rc = efx_mcdi_get_port_modes(enp, &port_modes, NULL)) != 0) {
 		/* No port mode information available - use default mapping */
 		goto out;
 	}

Modified: head/sys/dev/sfxge/common/efx.h
==============================================================================
--- head/sys/dev/sfxge/common/efx.h	Mon May 16 06:32:06 2016	(r299903)
+++ head/sys/dev/sfxge/common/efx.h	Mon May 16 06:38:51 2016	(r299904)
@@ -173,6 +173,30 @@ extern 		void
 efx_nic_destroy(
 	__in	efx_nic_t *enp);
 
+#define	EFX_PCIE_LINK_SPEED_GEN1		1
+#define	EFX_PCIE_LINK_SPEED_GEN2		2
+#define	EFX_PCIE_LINK_SPEED_GEN3		3
+
+typedef enum efx_pcie_link_performance_e {
+	EFX_PCIE_LINK_PERFORMANCE_UNKNOWN_BANDWIDTH,
+	EFX_PCIE_LINK_PERFORMANCE_SUBOPTIMAL_BANDWIDTH,
+	EFX_PCIE_LINK_PERFORMANCE_SUBOPTIMAL_LATENCY,
+	EFX_PCIE_LINK_PERFORMANCE_OPTIMAL
+} efx_pcie_link_performance_t;
+
+extern	__checkReturn	efx_rc_t
+efx_nic_calculate_pcie_link_bandwidth(
+	__in		uint32_t pcie_link_width,
+	__in		uint32_t pcie_link_gen,
+	__out		uint32_t *bandwidth_mbpsp);
+
+extern	__checkReturn	efx_rc_t
+efx_nic_check_pcie_link_speed(
+	__in		efx_nic_t *enp,
+	__in		uint32_t pcie_link_width,
+	__in		uint32_t pcie_link_gen,
+	__out		efx_pcie_link_performance_t *resultp);
+
 #if EFSYS_OPT_MCDI
 
 #if EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD
@@ -1116,6 +1140,9 @@ typedef struct efx_nic_cfg_s {
 	uint32_t		enc_mcdi_max_payload_length;
 	/* VPD may be per-PF or global */
 	boolean_t		enc_vpd_is_global;
+	/* Minimum unidirectional bandwidth in Mb/s to max out all ports */
+	uint32_t		enc_required_pcie_bandwidth_mbps;
+	uint32_t		enc_max_pcie_link_gen;
 } efx_nic_cfg_t;
 
 #define	EFX_PCI_FUNCTION_IS_PF(_encp)	((_encp)->enc_vf == 0xffff)

Modified: head/sys/dev/sfxge/common/efx_nic.c
==============================================================================
--- head/sys/dev/sfxge/common/efx_nic.c	Mon May 16 06:32:06 2016	(r299903)
+++ head/sys/dev/sfxge/common/efx_nic.c	Mon May 16 06:38:51 2016	(r299904)
@@ -964,3 +964,101 @@ fail1:
 }
 
 #endif /* EFSYS_OPT_LOOPBACK */
+
+	__checkReturn	efx_rc_t
+efx_nic_calculate_pcie_link_bandwidth(
+	__in		uint32_t pcie_link_width,
+	__in		uint32_t pcie_link_gen,
+	__out		uint32_t *bandwidth_mbpsp)
+{
+	uint32_t lane_bandwidth;
+	uint32_t total_bandwidth;
+	efx_rc_t rc;
+
+	if ((pcie_link_width == 0) || (pcie_link_width > 16) ||
+	    !ISP2(pcie_link_width)) {
+		rc = EINVAL;
+		goto fail1;
+	}
+
+	switch (pcie_link_gen) {
+	case EFX_PCIE_LINK_SPEED_GEN1:
+		/* 2.5 Gb/s raw bandwidth with 8b/10b encoding */
+		lane_bandwidth = 2000;
+		break;
+	case EFX_PCIE_LINK_SPEED_GEN2:
+		/* 5.0 Gb/s raw bandwidth with 8b/10b encoding */
+		lane_bandwidth = 4000;
+		break;
+	case EFX_PCIE_LINK_SPEED_GEN3:
+		/* 8.0 Gb/s raw bandwidth with 128b/130b encoding */
+		lane_bandwidth = 7877;
+		break;
+	default:
+		rc = EINVAL;
+		goto fail2;
+	}
+
+	total_bandwidth = lane_bandwidth * pcie_link_width;
+	*bandwidth_mbpsp = total_bandwidth;
+
+	return (0);
+
+fail2:
+	EFSYS_PROBE(fail2);
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+	return (rc);
+}
+
+
+	__checkReturn	efx_rc_t
+efx_nic_check_pcie_link_speed(
+	__in		efx_nic_t *enp,
+	__in		uint32_t pcie_link_width,
+	__in		uint32_t pcie_link_gen,
+	__out		efx_pcie_link_performance_t *resultp)
+{
+	efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
+	uint32_t bandwidth;
+	efx_pcie_link_performance_t result;
+	efx_rc_t rc;
+
+	if ((encp->enc_required_pcie_bandwidth_mbps == 0) ||
+	    (pcie_link_width == 0) || (pcie_link_width == 32) ||
+	    (pcie_link_gen == 0)) {
+		/*
+		 * No usable info on what is required and/or in use. In virtual
+		 * machines, sometimes the PCIe link width is reported as 0 or
+		 * 32, or the speed as 0.
+		 */
+		result = EFX_PCIE_LINK_PERFORMANCE_UNKNOWN_BANDWIDTH;
+		goto out;
+	}
+
+	/* Calculate the available bandwidth in megabits per second */
+	rc = efx_nic_calculate_pcie_link_bandwidth(pcie_link_width,
+					    pcie_link_gen, &bandwidth);
+	if (rc != 0)
+		goto fail1;
+
+	if (bandwidth < encp->enc_required_pcie_bandwidth_mbps) {
+		result = EFX_PCIE_LINK_PERFORMANCE_SUBOPTIMAL_BANDWIDTH;
+	} else if (pcie_link_gen < encp->enc_max_pcie_link_gen) {
+		/* The link provides enough bandwidth but not optimal latency */
+		result = EFX_PCIE_LINK_PERFORMANCE_SUBOPTIMAL_LATENCY;
+	} else {
+		result = EFX_PCIE_LINK_PERFORMANCE_OPTIMAL;
+	}
+
+out:
+	*resultp = result;
+
+	return (0);
+
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+	return (rc);
+}

Modified: head/sys/dev/sfxge/common/hunt_nic.c
==============================================================================
--- head/sys/dev/sfxge/common/hunt_nic.c	Mon May 16 06:32:06 2016	(r299903)
+++ head/sys/dev/sfxge/common/hunt_nic.c	Mon May 16 06:38:51 2016	(r299904)
@@ -39,6 +39,65 @@ __FBSDID("$FreeBSD$");
 
 #if EFSYS_OPT_HUNTINGTON
 
+#include "ef10_tlv_layout.h"
+
+static	__checkReturn	efx_rc_t
+hunt_nic_get_required_pcie_bandwidth(
+	__in		efx_nic_t *enp,
+	__out		uint32_t *bandwidth_mbpsp)
+{
+	uint32_t port_modes;
+	uint32_t max_port_mode;
+	uint32_t bandwidth;
+	efx_rc_t rc;
+
+	/*
+	 * On Huntington, the firmware may not give us the current port mode, so
+	 * we need to go by the set of available port modes and assume the most
+	 * capable mode is in use.
+	 */
+
+	if ((rc = efx_mcdi_get_port_modes(enp, &port_modes, NULL)) != 0) {
+		/* No port mode info available */
+		bandwidth = 0;
+		goto out;
+	}
+
+	if (port_modes & (1 << TLV_PORT_MODE_40G_40G)) {
+		/*
+		 * This needs the full PCIe bandwidth (and could use
+		 * more) - roughly 64 Gbit/s for 8 lanes of Gen3.
+		 */
+		if ((rc = efx_nic_calculate_pcie_link_bandwidth(8,
+			    EFX_PCIE_LINK_SPEED_GEN3, &bandwidth)) != 0)
+			goto fail1;
+	} else {
+		if (port_modes & (1 << TLV_PORT_MODE_40G)) {
+			max_port_mode = TLV_PORT_MODE_40G;
+		} else if (port_modes & (1 << TLV_PORT_MODE_10G_10G_10G_10G)) {
+			max_port_mode = TLV_PORT_MODE_10G_10G_10G_10G;
+		} else {
+			/* Assume two 10G ports */
+			max_port_mode = TLV_PORT_MODE_10G_10G;
+		}
+
+		if ((rc = ef10_nic_get_port_mode_bandwidth(max_port_mode,
+							    &bandwidth)) != 0)
+		    goto fail2;
+	}
+
+out:
+	*bandwidth_mbpsp = bandwidth;
+
+	return (0);
+
+fail2:
+	EFSYS_PROBE(fail2);
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+	return (rc);
+}
 
 	__checkReturn	efx_rc_t
 hunt_board_cfg(
@@ -57,6 +116,7 @@ hunt_board_cfg(
 	uint32_t flags;
 	uint32_t sysclk;
 	uint32_t base, nvec;
+	uint32_t bandwidth;
 	efx_rc_t rc;
 
 	if ((rc = efx_mcdi_get_port_assignment(enp, &port)) != 0)
@@ -286,8 +346,17 @@ hunt_board_cfg(
 	 */
 	encp->enc_tx_tso_tcp_header_offset_limit = EF10_TCP_HEADER_OFFSET_LIMIT;
 
+	if ((rc = hunt_nic_get_required_pcie_bandwidth(enp, &bandwidth)) != 0)
+		goto fail15;
+	encp->enc_required_pcie_bandwidth_mbps = bandwidth;
+
+	/* All Huntington devices have a PCIe Gen3, 8 lane connector */
+	encp->enc_max_pcie_link_gen = EFX_PCIE_LINK_SPEED_GEN3;
+
 	return (0);
 
+fail15:
+	EFSYS_PROBE(fail15);
 fail14:
 	EFSYS_PROBE(fail14);
 fail13:

Modified: head/sys/dev/sfxge/common/medford_nic.c
==============================================================================
--- head/sys/dev/sfxge/common/medford_nic.c	Mon May 16 06:32:06 2016	(r299903)
+++ head/sys/dev/sfxge/common/medford_nic.c	Mon May 16 06:38:51 2016	(r299904)
@@ -95,6 +95,38 @@ fail1:
 	return (rc);
 }
 
+static	__checkReturn	efx_rc_t
+medford_nic_get_required_pcie_bandwidth(
+	__in		efx_nic_t *enp,
+	__out		uint32_t *bandwidth_mbpsp)
+{
+	uint32_t port_modes;
+	uint32_t current_mode;
+	uint32_t bandwidth;
+	efx_rc_t rc;
+
+	if ((rc = efx_mcdi_get_port_modes(enp, &port_modes,
+				    &current_mode)) != 0) {
+		/* No port mode info available. */
+		bandwidth = 0;
+		goto out;
+	}
+
+	if ((rc = ef10_nic_get_port_mode_bandwidth(current_mode,
+						    &bandwidth)) != 0)
+		goto fail1;
+
+out:
+	*bandwidth_mbpsp = bandwidth;
+
+	return (0);
+
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+	return (rc);
+}
+
 	__checkReturn	efx_rc_t
 medford_board_cfg(
 	__in		efx_nic_t *enp)
@@ -112,6 +144,7 @@ medford_board_cfg(
 	uint32_t sysclk;
 	uint32_t base, nvec;
 	uint32_t end_padding;
+	uint32_t bandwidth;
 	efx_rc_t rc;
 
 	/*
@@ -275,8 +308,16 @@ medford_board_cfg(
 	 */
 	encp->enc_vpd_is_global = B_TRUE;
 
+	rc = medford_nic_get_required_pcie_bandwidth(enp, &bandwidth);
+	if (rc != 0)
+		goto fail13;
+	encp->enc_required_pcie_bandwidth_mbps = bandwidth;
+	encp->enc_max_pcie_link_gen = EFX_PCIE_LINK_SPEED_GEN3;
+
 	return (0);
 
+fail13:
+	EFSYS_PROBE(fail13);
 fail12:
 	EFSYS_PROBE(fail12);
 fail11:

Modified: head/sys/dev/sfxge/common/siena_nic.c
==============================================================================
--- head/sys/dev/sfxge/common/siena_nic.c	Mon May 16 06:32:06 2016	(r299903)
+++ head/sys/dev/sfxge/common/siena_nic.c	Mon May 16 06:38:51 2016	(r299904)
@@ -150,6 +150,10 @@ siena_board_cfg(
 	encp->enc_fw_assisted_tso_v2_enabled = B_FALSE;
 	encp->enc_allow_set_mac_with_installed_filters = B_TRUE;
 
+	/* Siena supports two 10G ports, and 8 lanes of PCIe Gen2 */
+	encp->enc_required_pcie_bandwidth_mbps = 2 * 10000;
+	encp->enc_max_pcie_link_gen = EFX_PCIE_LINK_SPEED_GEN2;
+
 	return (0);
 
 fail2:


More information about the svn-src-head mailing list