svn commit: r331452 - in head/sys: conf dev/mlx5 dev/mlx5/mlx5_core modules/mlx5

Hans Petter Selasky hselasky at FreeBSD.org
Fri Mar 23 18:32:04 UTC 2018


Author: hselasky
Date: Fri Mar 23 18:32:03 2018
New Revision: 331452
URL: https://svnweb.freebsd.org/changeset/base/331452

Log:
  Add mutual exclusion mechanism for software reset of firmware in mlx5core.
  
  Since the FW can be shared between PCI functions it is common that
  more than one health poll will detected a failure, this can lead to
  multiple resets.
  
  The solution is to use a FW locking mechanism using semaphore space to
  provide a way to synchronize between functions. The FW semaphore is
  acquired via config cycle access. First the VSEC gateway must be
  acquired, then the semaphore can be locked by writing a value to it
  and confirmed it's locked by reading the same value back. The process
  in the same to free the semaphore, except the value written should be
  zero.
  
  Submitted by:	slavash@
  MFC after:	1 week
  Sponsored by:	Mellanox Technologies

Added:
  head/sys/dev/mlx5/mlx5_core/mlx5_crspace.c   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/dev/mlx5/driver.h
  head/sys/dev/mlx5/mlx5_core/mlx5_core.h
  head/sys/dev/mlx5/mlx5_core/mlx5_health.c
  head/sys/dev/mlx5/mlx5_core/mlx5_main.c
  head/sys/modules/mlx5/Makefile

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Fri Mar 23 18:24:09 2018	(r331451)
+++ head/sys/conf/files	Fri Mar 23 18:32:03 2018	(r331452)
@@ -4732,6 +4732,8 @@ dev/mlx5/mlx5_core/mlx5_cmd.c			optional mlx5 pci	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_cq.c			optional mlx5 pci	\
 	compile-with "${OFED_C}"
+dev/mlx5/mlx5_core/mlx5_crspace.c		optional mlx5 pci	\
+	compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_diagnostics.c		optional mlx5 pci	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_eq.c			optional mlx5 pci	\

Modified: head/sys/dev/mlx5/driver.h
==============================================================================
--- head/sys/dev/mlx5/driver.h	Fri Mar 23 18:24:09 2018	(r331451)
+++ head/sys/dev/mlx5/driver.h	Fri Mar 23 18:32:03 2018	(r331452)
@@ -651,6 +651,7 @@ struct mlx5_core_dev {
 	struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
 	u32 num_q_counter_allocated[MLX5_INTERFACE_NUMBER];
 	struct mlx5_dump_data	*dump_data;
+	u32			vsec_addr;
 };
 
 enum {

Modified: head/sys/dev/mlx5/mlx5_core/mlx5_core.h
==============================================================================
--- head/sys/dev/mlx5/mlx5_core/mlx5_core.h	Fri Mar 23 18:24:09 2018	(r331451)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_core.h	Fri Mar 23 18:32:03 2018	(r331452)
@@ -64,6 +64,16 @@ enum {
 	MLX5_CMD_TIME, /* print command execution time */
 };
 
+enum mlx5_semaphore_space_address {
+	MLX5_SEMAPHORE_SW_RESET		= 0x20,
+};
+
+enum {
+	UNLOCK = 0,
+	LOCK = 1,
+	CAP_ID = 0x9,
+};
+
 struct mlx5_core_dev;
 
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
@@ -95,4 +105,8 @@ struct mlx5_crspace_regmap {
 
 extern struct pci_driver mlx5_core_driver;
 
+void mlx5_vsec_init(struct mlx5_core_dev *dev);
+int mlx5_pciconf_cap9_sem(struct mlx5_core_dev *dev, int state);
+int mlx5_pciconf_set_sem_addr_space(struct mlx5_core_dev *dev,
+				    u32 sem_space_address, int state);
 #endif /* __MLX5_CORE_H__ */

Added: head/sys/dev/mlx5/mlx5_core/mlx5_crspace.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_crspace.c	Fri Mar 23 18:32:03 2018	(r331452)
@@ -0,0 +1,248 @@
+/*-
+ * Copyright (c) 2013-2018, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <dev/mlx5/driver.h>
+#include "mlx5_core.h"
+
+enum {
+	PCI_CTRL_OFFSET = 0x4,
+	PCI_COUNTER_OFFSET = 0x8,
+	PCI_SEMAPHORE_OFFSET = 0xc,
+
+	PCI_ADDR_OFFSET = 0x10,
+	PCI_DATA_OFFSET = 0x14,
+
+	PCI_FLAG_BIT_OFFS = 31,
+	PCI_SPACE_BIT_OFFS = 0,
+	PCI_SPACE_BIT_LEN = 16,
+	PCI_SIZE_VLD_BIT_OFFS = 28,
+	PCI_SIZE_VLD_BIT_LEN = 1,
+	PCI_STATUS_BIT_OFFS = 29,
+	PCI_STATUS_BIT_LEN = 3,
+};
+
+enum {
+	IFC_MAX_RETRIES = 2048
+};
+
+#define MLX5_EXTRACT_C(source, offset, size)	\
+	((((unsigned)(source)) >> (offset)) & MLX5_ONES32(size))
+#define MLX5_EXTRACT(src, start, len)		\
+	(((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len))
+#define MLX5_ONES32(size)			\
+	((size) ? (0xffffffff >> (32 - (size))) : 0)
+#define MLX5_MASK32(offset, size)		\
+	(MLX5_ONES32(size) << (offset))
+#define MLX5_MERGE_C(rsrc1, rsrc2, start, len)  \
+	((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \
+	 ((rsrc1) & (~MLX5_MASK32((start), (len)))))
+#define MLX5_MERGE(rsrc1, rsrc2, start, len)	\
+	(((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len))
+
+static int mlx5_pciconf_wait_on_flag(struct mlx5_core_dev *dev,
+				     u8 expected_val)
+{
+	int retries = 0;
+	u32 flag;
+
+	for(;;) {
+		pci_read_config_dword(dev->pdev, dev->vsec_addr +
+				      PCI_ADDR_OFFSET, &flag);
+		flag = MLX5_EXTRACT(flag, PCI_FLAG_BIT_OFFS, 1);
+		if (flag == expected_val)
+			return (0);
+		retries++;
+		if (retries > IFC_MAX_RETRIES)
+			return (-EBUSY);
+		if ((retries & 0xf) == 0)
+			usleep_range(1000, 2000);
+	}
+}
+
+static int mlx5_pciconf_read(struct mlx5_core_dev *dev,
+			     unsigned int offset, u32 *data)
+{
+	u32 address;
+	int ret;
+
+	if (MLX5_EXTRACT(offset, 31, 1))
+		return -EINVAL;
+	address = MLX5_MERGE(offset, 0, PCI_FLAG_BIT_OFFS, 1);
+	pci_write_config_dword(dev->pdev, dev->vsec_addr +
+			       PCI_ADDR_OFFSET, address);
+	ret = mlx5_pciconf_wait_on_flag(dev, 1);
+	if (ret)
+		return (ret);
+	return pci_read_config_dword(dev->pdev, dev->vsec_addr +
+				     PCI_DATA_OFFSET, data);
+}
+
+static int mlx5_pciconf_write(struct mlx5_core_dev *dev,
+			      unsigned int offset, u32 data)
+{
+	u32 address;
+
+	if (MLX5_EXTRACT(offset, 31, 1))
+		return -EINVAL;
+
+	/* Set flag to 0x1 */
+	address = MLX5_MERGE(offset, 1, PCI_FLAG_BIT_OFFS, 1);
+
+	pci_write_config_dword(dev->pdev, dev->vsec_addr +
+			       PCI_DATA_OFFSET, data);
+
+	pci_write_config_dword(dev->pdev, dev->vsec_addr +
+			       PCI_ADDR_OFFSET, address);
+
+	/* Wait for the flag to be cleared */
+	return mlx5_pciconf_wait_on_flag(dev, 0);
+
+}
+
+int mlx5_pciconf_cap9_sem(struct mlx5_core_dev *dev, int state)
+{
+	u32 counter = 0;
+	int retries = 0;
+	u32 lock_val;
+
+	if (state == UNLOCK) {
+		pci_write_config_dword(dev->pdev, dev->vsec_addr +
+				       PCI_SEMAPHORE_OFFSET, 0);
+		return (0);
+	}
+	do {
+		if (retries > IFC_MAX_RETRIES * 10)
+			return -EBUSY;
+		pci_read_config_dword(dev->pdev, dev->vsec_addr +
+				      PCI_SEMAPHORE_OFFSET, &lock_val);
+		if (lock_val != 0) {
+			retries++;
+			if (retries > IFC_MAX_RETRIES * 10)
+				return -EBUSY;
+			usleep_range(1000, 2000);
+			continue;
+		}
+		pci_read_config_dword(dev->pdev, dev->vsec_addr +
+				      PCI_COUNTER_OFFSET, &counter);
+		pci_write_config_dword(dev->pdev, dev->vsec_addr +
+				       PCI_SEMAPHORE_OFFSET, counter);
+		pci_read_config_dword(dev->pdev, dev->vsec_addr +
+				      PCI_SEMAPHORE_OFFSET, &lock_val);
+		retries++;
+	} while (counter != lock_val);
+	return 0;
+}
+
+#define MLX5_PROTECTED_CR_SPACE_DOMAIN 0x6
+static int mlx5_pciconf_set_addr_space(struct mlx5_core_dev *dev,
+				       u16 space)
+{
+	u32 val;
+
+	pci_read_config_dword(dev->pdev, dev->vsec_addr +
+			      PCI_CTRL_OFFSET, &val);
+
+	val = MLX5_MERGE(val, space, PCI_SPACE_BIT_OFFS,
+			 PCI_SPACE_BIT_LEN);
+	pci_write_config_dword(dev->pdev, dev->vsec_addr +
+			       PCI_CTRL_OFFSET, val);
+
+	pci_read_config_dword(dev->pdev, dev->vsec_addr +
+			      PCI_CTRL_OFFSET, &val);
+
+	if (MLX5_EXTRACT(val, PCI_STATUS_BIT_OFFS,
+			 PCI_STATUS_BIT_LEN) == 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+#define MLX5_CR_SPACE_DOMAIN 0x2
+static int mlx5_get_vendor_cap_addr(struct mlx5_core_dev *dev)
+{
+	int vend_cap;
+	int ret;
+
+	vend_cap = pci_find_capability(dev->pdev, CAP_ID);
+	if (!vend_cap)
+		return 0;
+	dev->vsec_addr = vend_cap;
+	ret = mlx5_pciconf_cap9_sem(dev, LOCK);
+	if (ret) {
+		mlx5_core_warn(dev,
+			       "pciconf_cap9_sem locking failure\n");
+		return 0;
+	}
+	if (mlx5_pciconf_set_addr_space(dev, MLX5_CR_SPACE_DOMAIN))
+		vend_cap = 0;
+	ret = mlx5_pciconf_cap9_sem(dev, UNLOCK);
+	if (ret)
+		mlx5_core_warn(dev,
+			       "pciconf_cap9_sem unlocking failure\n");
+	return vend_cap;
+}
+
+#define MLX5_SEMAPHORE_SPACE_DOMAIN 0xA
+int mlx5_pciconf_set_sem_addr_space(struct mlx5_core_dev *dev,
+				    u32 sem_space_address, int state)
+{
+	u32 data, id = 0;
+	int ret;
+
+	ret = mlx5_pciconf_set_addr_space(dev,
+					  MLX5_SEMAPHORE_SPACE_DOMAIN);
+	if (ret)
+		return (ret);
+
+	if (state == LOCK)
+		/* Get a unique ID based on the counter */
+		pci_read_config_dword(dev->pdev, dev->vsec_addr +
+				      PCI_COUNTER_OFFSET, &id);
+
+	/* Try to modify lock */
+	ret = mlx5_pciconf_write(dev, sem_space_address, id);
+	if (ret)
+		return (ret);
+
+	/* Verify lock was modified */
+	ret = mlx5_pciconf_read(dev, sem_space_address, &data);
+	if (ret)
+		return -EINVAL;
+
+	if (data != id)
+		return -EBUSY;
+
+	return 0;
+}
+
+void mlx5_vsec_init(struct mlx5_core_dev *dev)
+{
+	dev->vsec_addr = mlx5_get_vendor_cap_addr(dev);
+}
+

Modified: head/sys/dev/mlx5/mlx5_core/mlx5_health.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_core/mlx5_health.c	Fri Mar 23 18:24:09 2018	(r331451)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_health.c	Fri Mar 23 18:32:03 2018	(r331452)
@@ -48,6 +48,8 @@ enum {
 enum {
 	MLX5_DROP_NEW_HEALTH_WORK,
 	MLX5_DROP_NEW_RECOVERY_WORK,
+	MLX5_SKIP_SW_RESET,
+	MLX5_SW_RESET_SEM_LOCKED,
 };
 
 enum  {
@@ -59,6 +61,33 @@ enum  {
 	MLX5_SENSOR_FW_SYND_RFR		= 5,
 };
 
+static int lock_sem_sw_reset(struct mlx5_core_dev *dev, int state)
+{
+	int ret, err;
+
+	/* Lock GW access */
+	ret = mlx5_pciconf_cap9_sem(dev, LOCK);
+	if (ret) {
+		mlx5_core_warn(dev, "Timed out locking gateway %d, %d\n", state, ret);
+		return ret;
+	}
+
+	ret = mlx5_pciconf_set_sem_addr_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
+	if (ret && state == LOCK) {
+		if (ret == -EBUSY)
+			mlx5_core_dbg(dev, "SW reset FW semaphore already locked, another function will handle the reset\n");
+		else
+			mlx5_core_warn(dev, "SW reset semaphore lock return %d\n", ret);
+	}
+
+	/* Unlock GW access */
+	err = mlx5_pciconf_cap9_sem(dev, UNLOCK);
+	if (err)
+		mlx5_core_warn(dev, "Timed out unlocking gateway: state %d, err %d\n", state, err);
+
+	return ret;
+}
+
 static u8 get_nic_mode(struct mlx5_core_dev *dev)
 {
 	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
@@ -138,6 +167,7 @@ static void reset_fw_if_needed(struct mlx5_core_dev *d
 {
 	bool supported = (ioread32be(&dev->iseg->initializing) >>
 			  MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
+	struct mlx5_core_health *health = &dev->priv.health;
 	u32 cmdq_addr, fatal_error;
 
 	if (!supported)
@@ -151,7 +181,8 @@ static void reset_fw_if_needed(struct mlx5_core_dev *d
 	fatal_error = check_fatal_sensors(dev);
 	if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
 	    fatal_error == MLX5_SENSOR_NIC_DISABLED ||
-	    fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
+	    fatal_error == MLX5_SENSOR_NIC_SW_RESET ||
+	    test_bit(MLX5_SKIP_SW_RESET, &health->flags)) {
 		mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
 		return;
 	}
@@ -223,6 +254,7 @@ static void health_recover(struct work_struct *work)
 	struct delayed_work *dwork;
 	struct mlx5_core_dev *dev;
 	struct mlx5_priv *priv;
+	bool recover = true;
 	u8 nic_mode;
 
 	dwork = container_of(work, struct delayed_work, work);
@@ -232,7 +264,8 @@ static void health_recover(struct work_struct *work)
 
 	if (sensor_pci_no_comm(dev)) {
 		dev_err(&dev->pdev->dev, "health recovery flow aborted, PCI reads still not working\n");
-		return;
+		recover = false;
+		goto clear_sem;
 	}
 
 	nic_mode = get_nic_mode(dev);
@@ -245,11 +278,21 @@ static void health_recover(struct work_struct *work)
 	if (nic_mode != MLX5_NIC_IFC_DISABLED) {
 		dev_err(&dev->pdev->dev, "health recovery flow aborted, unexpected NIC IFC mode %d.\n",
 			nic_mode);
-		return;
+		recover = false;
 	}
 
-	dev_err(&dev->pdev->dev, "starting health recovery flow\n");
-	mlx5_recover_device(dev);
+clear_sem:
+	if (test_and_clear_bit(MLX5_SW_RESET_SEM_LOCKED, &health->flags)) {
+		mlx5_core_dbg(dev, "Unlocking FW reset semaphore\n");
+		lock_sem_sw_reset(dev, UNLOCK);
+	}
+
+	test_and_clear_bit(MLX5_SKIP_SW_RESET, &health->flags);
+
+	if (recover) {
+		dev_err(&dev->pdev->dev, "starting health recovery flow\n");
+		mlx5_recover_device(dev);
+	}
 }
 
 /* How much time to wait until health resetting the driver (in msecs) */
@@ -269,10 +312,29 @@ static void health_care(struct work_struct *work)
 	struct mlx5_core_dev *dev;
 	struct mlx5_priv *priv;
 	unsigned long flags;
+	int ret;
 
 	health = container_of(work, struct mlx5_core_health, work);
 	priv = container_of(health, struct mlx5_priv, health);
 	dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	if (mlx5_core_is_pf(dev)) {
+		ret = lock_sem_sw_reset(dev, LOCK);
+		if (!ret) {
+			mlx5_core_warn(dev, "Locked FW reset semaphore\n");
+			set_bit(MLX5_SW_RESET_SEM_LOCKED, &health->flags);
+		}
+		else if (ret == -EBUSY) {
+			/* sw reset will be skipped only in case we detect the
+			 * semaphore was already taken. In case of an error
+			 * while taking the semaphore we prefer to issue a
+			 * reset since longer cr-dump time and multiple resets
+			 * are better than a stuck fw.
+			 */
+			set_bit(MLX5_SKIP_SW_RESET, &health->flags);
+		}
+	}
+
 	mlx5_core_warn(dev, "handling bad device here\n");
 	mlx5_handle_bad_state(dev);
 	recover_delay = msecs_to_jiffies(get_recovery_delay(dev));

Modified: head/sys/dev/mlx5/mlx5_core/mlx5_main.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_core/mlx5_main.c	Fri Mar 23 18:24:09 2018	(r331451)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_main.c	Fri Mar 23 18:32:03 2018	(r331452)
@@ -873,6 +873,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, s
 	struct pci_dev *pdev = dev->pdev;
 	int err;
 
+	mlx5_vsec_init(dev);
+
 	err = mlx5_query_hca_caps(dev);
 	if (err) {
 		dev_err(&pdev->dev, "query hca failed\n");

Modified: head/sys/modules/mlx5/Makefile
==============================================================================
--- head/sys/modules/mlx5/Makefile	Fri Mar 23 18:24:09 2018	(r331451)
+++ head/sys/modules/mlx5/Makefile	Fri Mar 23 18:32:03 2018	(r331452)
@@ -6,6 +6,7 @@ SRCS= \
 mlx5_alloc.c \
 mlx5_cmd.c \
 mlx5_cq.c \
+mlx5_crspace.c \
 mlx5_diagnostics.c \
 mlx5_eq.c \
 mlx5_fs_cmd.c \


More information about the svn-src-head mailing list