svn commit: r347306 - in head/sys/dev/mlx5: . mlx5_core

Hans Petter Selasky hselasky at FreeBSD.org
Wed May 8 10:58:09 UTC 2019


Author: hselasky
Date: Wed May  8 10:58:06 2019
New Revision: 347306
URL: https://svnweb.freebsd.org/changeset/base/347306

Log:
  Implement reading PCI power status in mlx5core.
  
  Implement a watchdog as part of the healtcare subsystem which
  reads the PCI power status during startup and upon the PCI
  power status change event and store it into the core device
  structure. This value is then exported to user-space via a
  read-only SYSCTL. A dmesg print has been added to inform
  the admin about the PCI power status.
  
  MFC after:	3 days
  Sponsored by:	Mellanox Technologies

Modified:
  head/sys/dev/mlx5/device.h
  head/sys/dev/mlx5/driver.h
  head/sys/dev/mlx5/mlx5_core/mlx5_eq.c
  head/sys/dev/mlx5/mlx5_core/mlx5_health.c
  head/sys/dev/mlx5/mlx5_core/mlx5_main.c
  head/sys/dev/mlx5/mlx5_ifc.h

Modified: head/sys/dev/mlx5/device.h
==============================================================================
--- head/sys/dev/mlx5/device.h	Wed May  8 10:57:37 2019	(r347305)
+++ head/sys/dev/mlx5/device.h	Wed May  8 10:58:06 2019	(r347306)
@@ -1215,6 +1215,7 @@ static inline int mlx5_get_cqe_format(const struct mlx
 
 enum {
 	MLX5_GEN_EVENT_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1,
+	MLX5_GEN_EVENT_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5,
 };
 
 /* 8 regular priorities + 1 for multicast */

Modified: head/sys/dev/mlx5/driver.h
==============================================================================
--- head/sys/dev/mlx5/driver.h	Wed May  8 10:57:37 2019	(r347305)
+++ head/sys/dev/mlx5/driver.h	Wed May  8 10:58:06 2019	(r347306)
@@ -506,6 +506,7 @@ struct mlx5_core_health {
 	int				miss_counter;
 	u32				fatal_error;
 	struct workqueue_struct	       *wq_watchdog;
+	struct work_struct		work_watchdog;
 	/* wq spinlock to synchronize draining */
 	spinlock_t			wq_lock;
 	struct workqueue_struct	       *wq;
@@ -705,6 +706,8 @@ struct mlx5_core_dev {
 
 	struct sysctl_ctx_list	sysctl_ctx;
 	int			msix_eqvec;
+	int			pwr_status;
+	int			pwr_value;
 
 	struct {
 		struct mlx5_rsvd_gids	reserved_gids;
@@ -955,6 +958,7 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, 
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
+void mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev);
 
 #define	mlx5_buf_alloc_node(dev, size, direct, buf, node) \
 	mlx5_buf_alloc(dev, size, direct, buf)
@@ -1089,6 +1093,8 @@ int mlx5_vsc_write(struct mlx5_core_dev *mdev, u32 add
 int mlx5_vsc_read(struct mlx5_core_dev *mdev, u32 addr, u32 *data);
 int mlx5_vsc_lock_addr_space(struct mlx5_core_dev *mdev, u32 addr);
 int mlx5_vsc_unlock_addr_space(struct mlx5_core_dev *mdev, u32 addr);
+int mlx5_pci_read_power_status(struct mlx5_core_dev *mdev,
+			       u16 *p_power, u8 *p_status);
 
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {

Modified: head/sys/dev/mlx5/mlx5_core/mlx5_eq.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_core/mlx5_eq.c	Wed May  8 10:57:37 2019	(r347305)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_eq.c	Wed May  8 10:58:06 2019	(r347306)
@@ -561,6 +561,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, temp_warn_event))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
 
+	if (MLX5_CAP_GEN(dev, general_notification_event)) {
+		async_event_mask |= (1ull <<
+		    MLX5_EVENT_TYPE_CODING_GENERAL_NOTIFICATION_EVENT);
+	}
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
@@ -716,14 +721,17 @@ static void mlx5_port_general_notification_event(struc
 						 struct mlx5_eqe *eqe)
 {
 	u8 port = (eqe->data.port.port >> 4) & 0xf;
-	u32 rqn = 0;
-	struct mlx5_eqe_general_notification_event *general_event = NULL;
+	u32 rqn;
+	struct mlx5_eqe_general_notification_event *general_event;
 
 	switch (eqe->sub_type) {
 	case MLX5_GEN_EVENT_SUBTYPE_DELAY_DROP_TIMEOUT:
 		general_event = &eqe->data.general_notifications;
 		rqn = be32_to_cpu(general_event->rq_user_index_delay_drop) &
 			  0xffffff;
+		break;
+	case MLX5_GEN_EVENT_SUBTYPE_PCI_POWER_CHANGE_EVENT:
+		mlx5_trigger_health_watchdog(dev);
 		break;
 	default:
 		mlx5_core_warn(dev,

Modified: head/sys/dev/mlx5/mlx5_core/mlx5_health.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_core/mlx5_health.c	Wed May  8 10:57:37 2019	(r347305)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_health.c	Wed May  8 10:58:06 2019	(r347306)
@@ -41,6 +41,7 @@
 enum {
 	MLX5_DROP_NEW_HEALTH_WORK,
 	MLX5_DROP_NEW_RECOVERY_WORK,
+	MLX5_DROP_NEW_WATCHDOG_WORK,
 };
 
 enum  {
@@ -506,6 +507,66 @@ static void print_health_info(struct mlx5_core_dev *de
 	printf("mlx5_core: INFO: ""raw fw_ver 0x%08x\n", fw);
 }
 
+static void health_watchdog(struct work_struct *work)
+{
+	struct mlx5_core_dev *dev;
+	u16 power;
+	u8 status;
+	int err;
+
+	dev = container_of(work, struct mlx5_core_dev, priv.health.work_watchdog);
+
+	if (!MLX5_CAP_GEN(dev, mcam_reg) ||
+	    !MLX5_CAP_MCAM_FEATURE(dev, pcie_status_and_power))
+		return;
+
+	err = mlx5_pci_read_power_status(dev, &power, &status);
+	if (err < 0) {
+		mlx5_core_warn(dev, "Failed reading power status: %d\n", err);
+		return;
+	}
+
+	dev->pwr_value = power;
+
+	if (dev->pwr_status != status) {
+		device_t bsddev = dev->pdev->dev.bsddev;
+
+		switch (status) {
+		case 0:
+			dev->pwr_status = status;
+			device_printf(bsddev, "PCI power is not published by the PCIe slot.\n");
+			break;
+		case 1:
+			dev->pwr_status = status;
+			device_printf(bsddev, "PCIe slot advertised sufficient power (%uW).\n", power);
+			break;
+		case 2:
+			dev->pwr_status = status;
+			device_printf(bsddev, "WARN: Detected insufficient power on the PCIe slot (%uW).\n", power);
+			break;
+		default:
+			dev->pwr_status = 0;
+			device_printf(bsddev, "WARN: Unknown power state detected(%d).\n", status);
+			break;
+		}
+	}
+}
+
+void
+mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	unsigned long flags;
+
+	spin_lock_irqsave(&health->wq_lock, flags);
+	if (!test_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags))
+		queue_work(health->wq_watchdog, &health->work_watchdog);
+	else
+		dev_err(&dev->pdev->dev,
+			"scheduling watchdog is not permitted at this stage\n");
+	spin_unlock_irqrestore(&health->wq_lock, flags);
+}
+
 static void poll_health(unsigned long data)
 {
 	struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
@@ -516,9 +577,6 @@ static void poll_health(unsigned long data)
 	if (dev->state != MLX5_DEVICE_STATE_UP)
 		return;
 
-	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
-		goto out;
-
 	count = ioread32be(health->health_counter);
 	if (count == health->prev)
 		++health->miss_counter;
@@ -540,7 +598,6 @@ static void poll_health(unsigned long data)
 		mlx5_trigger_health_work(dev);
 	}
 
-out:
 	mod_timer(&health->timer, get_next_poll_jiffies());
 }
 
@@ -552,12 +609,16 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 	health->fatal_error = MLX5_SENSOR_NO_ERR;
 	clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 	clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+	clear_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
 
 	setup_timer(&health->timer, poll_health, (unsigned long)dev);
 	mod_timer(&health->timer,
 		  round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL));
+
+	/* do initial PCI power state readout */
+	mlx5_trigger_health_watchdog(dev);
 }
 
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
@@ -569,6 +630,7 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, 
 		spin_lock_irqsave(&health->wq_lock, flags);
 		set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 		set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+		set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags);
 		spin_unlock_irqrestore(&health->wq_lock, flags);
 	}
 
@@ -583,9 +645,11 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 	spin_lock_irqsave(&health->wq_lock, flags);
 	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+	set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags);
 	spin_unlock_irqrestore(&health->wq_lock, flags);
 	cancel_delayed_work_sync(&health->recover_work);
 	cancel_work_sync(&health->work);
+	cancel_work_sync(&health->work_watchdog);
 }
 
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
@@ -628,6 +692,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 
 	spin_lock_init(&health->wq_lock);
 	INIT_WORK(&health->work, health_care);
+	INIT_WORK(&health->work_watchdog, health_watchdog);
 	INIT_DELAYED_WORK(&health->recover_work, health_recover);
 
 	return 0;

Modified: head/sys/dev/mlx5/mlx5_core/mlx5_main.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_core/mlx5_main.c	Wed May  8 10:57:37 2019	(r347305)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_main.c	Wed May  8 10:58:06 2019	(r347306)
@@ -197,6 +197,21 @@ static int set_dma_caps(struct pci_dev *pdev)
 	return err;
 }
 
+int mlx5_pci_read_power_status(struct mlx5_core_dev *dev,
+			       u16 *p_power, u8 *p_status)
+{
+	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {};
+	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {};
+	int err;
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+	    MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN, 0, 0);
+
+	*p_status = MLX5_GET(mpein_reg, out, pwr_status);
+	*p_power = MLX5_GET(mpein_reg, out, pci_power);
+	return err;
+}
+
 static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
 {
 	struct pci_dev *pdev = dev->pdev;
@@ -1273,6 +1288,14 @@ static int init_one(struct pci_dev *pdev,
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
 	    OID_AUTO, "msix_eqvec", CTLFLAG_RDTUN, &dev->msix_eqvec, 0,
 	    "Maximum number of MSIX event queue vectors, if set");
+	SYSCTL_ADD_INT(&dev->sysctl_ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
+	    OID_AUTO, "power_status", CTLFLAG_RD, &dev->pwr_status, 0,
+	    "0:Invalid 1:Sufficient 2:Insufficient");
+	SYSCTL_ADD_INT(&dev->sysctl_ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
+	    OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
+	    "Current power value in Watts");
 
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);

Modified: head/sys/dev/mlx5/mlx5_ifc.h
==============================================================================
--- head/sys/dev/mlx5/mlx5_ifc.h	Wed May  8 10:57:37 2019	(r347305)
+++ head/sys/dev/mlx5/mlx5_ifc.h	Wed May  8 10:58:06 2019	(r347306)
@@ -8640,8 +8640,9 @@ struct mlx5_ifc_pcam_reg_bits {
 };
 
 struct mlx5_ifc_mcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7f];
-
+	u8         reserved_at_0[0x6e];
+	u8         pcie_status_and_power[0x1];
+	u8         reserved_at_111[0x10];
 	u8         pcie_performance_group[0x1];
 };
 
@@ -9998,6 +9999,91 @@ struct mlx5_ifc_mpcnt_reg_bits {
 	u8         reserved_2[0x1f];
 
 	union mlx5_ifc_mpcnt_cntrs_grp_data_layout_bits counter_set;
+};
+
+enum {
+	MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN = 0x9050,
+	MLX5_MPEIN_PWR_STATUS_INVALID = 0,
+	MLX5_MPEIN_PWR_STATUS_SUFFICIENT = 1,
+	MLX5_MPEIN_PWR_STATUS_INSUFFICIENT = 2,
+};
+
+struct mlx5_ifc_mpein_reg_bits {
+	u8         reserved_at_0[0x2];
+	u8         depth[0x6];
+	u8         pcie_index[0x8];
+	u8         node[0x8];
+	u8         reserved_at_18[0x8];
+
+	u8         capability_mask[0x20];
+
+	u8         reserved_at_40[0x8];
+	u8         link_width_enabled[0x8];
+	u8         link_speed_enabled[0x10];
+
+	u8         lane0_physical_position[0x8];
+	u8         link_width_active[0x8];
+	u8         link_speed_active[0x10];
+
+	u8         num_of_pfs[0x10];
+	u8         num_of_vfs[0x10];
+
+	u8         bdf0[0x10];
+	u8         reserved_at_b0[0x10];
+
+	u8         max_read_request_size[0x4];
+	u8         max_payload_size[0x4];
+	u8         reserved_at_c8[0x5];
+	u8         pwr_status[0x3];
+	u8         port_type[0x4];
+	u8         reserved_at_d4[0xb];
+	u8         lane_reversal[0x1];
+
+	u8         reserved_at_e0[0x14];
+	u8         pci_power[0xc];
+
+	u8         reserved_at_100[0x20];
+
+	u8         device_status[0x10];
+	u8         port_state[0x8];
+	u8         reserved_at_138[0x8];
+
+	u8         reserved_at_140[0x10];
+	u8         receiver_detect_result[0x10];
+
+	u8         reserved_at_160[0x20];
+};
+
+struct mlx5_ifc_mpein_reg_ext_bits {
+	u8         reserved_at_0[0x2];
+	u8         depth[0x6];
+	u8         pcie_index[0x8];
+	u8         node[0x8];
+	u8         reserved_at_18[0x8];
+
+	u8         reserved_at_20[0x20];
+
+	u8         reserved_at_40[0x8];
+	u8         link_width_enabled[0x8];
+	u8         link_speed_enabled[0x10];
+
+	u8         lane0_physical_position[0x8];
+	u8         link_width_active[0x8];
+	u8         link_speed_active[0x10];
+
+	u8         num_of_pfs[0x10];
+	u8         num_of_vfs[0x10];
+
+	u8         bdf0[0x10];
+	u8         reserved_at_b0[0x10];
+
+	u8         max_read_request_size[0x4];
+	u8         max_payload_size[0x4];
+	u8         reserved_at_c8[0x5];
+	u8         pwr_status[0x3];
+	u8         port_type[0x4];
+	u8         reserved_at_d4[0xb];
+	u8         lane_reversal[0x1];
 };
 
 struct mlx5_ifc_mcqi_cap_bits {


More information about the svn-src-all mailing list