git: e53c3826c0eb - stable/13 - mlx5en: Implement support for internal queues, IQ.

From: Hans Petter Selasky <hselasky_at_FreeBSD.org>
Date: Tue, 08 Feb 2022 15:13:57 UTC
The branch stable/13 has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=e53c3826c0ebbc6ae4448af7ef12f6ff8a3f3f61

commit e53c3826c0ebbc6ae4448af7ef12f6ff8a3f3f61
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2022-02-08 15:08:52 +0000
Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2022-02-08 15:08:52 +0000

    mlx5en: Implement support for internal queues, IQ.
    
    Internal send queues are regular sendqueues which are reserved for WQE commands
    towards the hardware and firmware. These queues typically carry resync
    information for ongoing TLS RX connections and when changing schedule queues
    for rate limited connections.
    
    The internal queue, IQ, code is more or less a stripped down copy
    of the existing SQ managing code with exception of:
    
    1) An optional single segment memory buffer which can be read or
       written as a whole by the hardware, may be provided.
    2) An optional completion callback for all transmit operations, may
       be provided.
    3) Does not support mbufs.
    
    Sponsored by:   NVIDIA Networking
    
    (cherry picked from commit 694263572f1bdf545199fcfb0853b93eb0dd0644)
---
 sys/conf/files                      |   2 +
 sys/dev/mlx5/mlx5_en/en.h           |  57 ++++
 sys/dev/mlx5/mlx5_en/mlx5_en_iq.c   | 524 ++++++++++++++++++++++++++++++++++++
 sys/dev/mlx5/mlx5_en/mlx5_en_main.c |  15 +-
 sys/dev/mlx5/mlx5_en/mlx5_en_rx.c   |   9 +
 sys/modules/mlx5en/Makefile         |   1 +
 6 files changed, 607 insertions(+), 1 deletion(-)

diff --git a/sys/conf/files b/sys/conf/files
index 516eeecc4549..d37d73bce98d 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4944,6 +4944,8 @@ dev/mlx5/mlx5_en/mlx5_en_flow_table.c		optional mlx5en pci inet inet6	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_hw_tls.c		optional mlx5en pci inet inet6	\
 	compile-with "${OFED_C}"
+dev/mlx5/mlx5_en/mlx5_en_iq.c			optional mlx5en pci inet inet6	\
+	compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_rx.c			optional mlx5en pci inet inet6	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_rl.c			optional mlx5en pci inet inet6	\
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index 9db48c435e8a..bc3bd11085a3 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -778,6 +778,52 @@ struct mlx5e_rq {
 	struct mlx5e_channel *channel;
 } __aligned(MLX5E_CACHELINE_SIZE);
 
+typedef void (mlx5e_iq_callback_t)(void *arg);
+
+struct mlx5e_iq_data {
+	bus_dmamap_t dma_map;
+	mlx5e_iq_callback_t *callback;
+	void *arg;
+	volatile s32 *p_refcount;	/* in use refcount, if any */
+	u32 num_wqebbs;
+	u32 dma_sync;
+};
+
+struct mlx5e_iq {
+	/* persistant fields */
+	struct mtx lock;
+	struct mtx comp_lock;
+	int	db_inhibit;
+
+	/* data path */
+#define	mlx5e_iq_zero_start dma_tag
+	bus_dma_tag_t dma_tag;
+
+	u16 cc;	/* consumer counter */
+	u16 pc __aligned(MLX5E_CACHELINE_SIZE);
+	u16 running;
+
+	union {
+		u32 d32[2];
+		u64 d64;
+	} doorbell;
+
+	struct mlx5e_cq cq;
+
+	/* pointers to per request info: write@xmit, read@completion */
+	struct mlx5e_iq_data *data;
+
+	/* read only */
+	struct mlx5_wq_cyc wq;
+	void __iomem *uar_map;
+	u32 sqn;
+	u32 mkey_be;
+
+	/* control path */
+	struct mlx5_wq_ctrl wq_ctrl;
+	struct mlx5e_priv *priv;
+};
+
 struct mlx5e_sq_mbuf {
 	bus_dmamap_t dma_map;
 	struct mbuf *mbuf;
@@ -873,6 +919,7 @@ struct mlx5e_channel {
 	struct m_snd_tag tag;
 	struct mlx5_sq_bfreg bfreg;
 	struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC];
+	struct mlx5e_iq iq;
 	struct mlx5e_priv *priv;
 	struct completion completion;
 	int	ix;
@@ -1223,6 +1270,16 @@ int	mlx5e_update_buf_lossy(struct mlx5e_priv *priv);
 int	mlx5e_fec_update(struct mlx5e_priv *priv);
 int	mlx5e_hw_temperature_update(struct mlx5e_priv *priv);
 
+/* Internal Queue, IQ, API functions */
+void	mlx5e_iq_send_nop(struct mlx5e_iq *, u32);
+int	mlx5e_iq_open(struct mlx5e_channel *, struct mlx5e_sq_param *, struct mlx5e_cq_param *, struct mlx5e_iq *);
+void	mlx5e_iq_close(struct mlx5e_iq *);
+void	mlx5e_iq_static_init(struct mlx5e_iq *);
+void	mlx5e_iq_static_destroy(struct mlx5e_iq *);
+void	mlx5e_iq_notify_hw(struct mlx5e_iq *);
+int	mlx5e_iq_get_producer_index(struct mlx5e_iq *);
+void	mlx5e_iq_load_memory_single(struct mlx5e_iq *, u16, void *, size_t, u64 *, u32);
+
 if_snd_tag_alloc_t mlx5e_ul_snd_tag_alloc;
 if_snd_tag_modify_t mlx5e_ul_snd_tag_modify;
 if_snd_tag_query_t mlx5e_ul_snd_tag_query;
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c b/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c
new file mode 100644
index 000000000000..3bc4959e046f
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c
@@ -0,0 +1,524 @@
+/*-
+ * Copyright (c) 2021 NVIDIA corporation & affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The internal queue, IQ, code is more or less a stripped down copy
+ * of the existing SQ managing code with exception of:
+ *
+ * - an optional single segment memory buffer which can be read or
+ *   written as a whole by the hardware, may be provided.
+ *
+ * - an optional completion callback for all transmit operations, may
+ *   be provided.
+ *
+ * - does not support mbufs.
+ */
+
+#include <dev/mlx5/mlx5_en/en.h>
+
+static void
+mlx5e_iq_poll(struct mlx5e_iq *iq, int budget)
+{
+	const struct mlx5_cqe64 *cqe;
+	u16 ci;
+	u16 iqcc;
+
+	/*
+	 * iq->cc must be updated only after mlx5_cqwq_update_db_record(),
+	 * otherwise a cq overrun may occur
+	 */
+	iqcc = iq->cc;
+
+	while (budget-- > 0) {
+
+		cqe = mlx5e_get_cqe(&iq->cq);
+		if (!cqe)
+			break;
+
+		mlx5_cqwq_pop(&iq->cq.wq);
+
+		ci = iqcc & iq->wq.sz_m1;
+
+		if (likely(iq->data[ci].dma_sync != 0)) {
+			/* make sure data written by hardware is visible to CPU */
+			bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync);
+			bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map);
+
+			iq->data[ci].dma_sync = 0;
+		}
+
+		if (likely(iq->data[ci].callback != NULL)) {
+			iq->data[ci].callback(iq->data[ci].arg);
+			iq->data[ci].callback = NULL;
+		}
+
+		if (unlikely(iq->data[ci].p_refcount != NULL)) {
+			atomic_add_int(iq->data[ci].p_refcount, -1);
+			iq->data[ci].p_refcount = NULL;
+		}
+		iqcc += iq->data[ci].num_wqebbs;
+	}
+
+	mlx5_cqwq_update_db_record(&iq->cq.wq);
+
+	/* Ensure cq space is freed before enabling more cqes */
+	atomic_thread_fence_rel();
+
+	iq->cc = iqcc;
+}
+
+static void
+mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
+{
+	struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq);
+
+	mtx_lock(&iq->comp_lock);
+	mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX);
+	mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
+	mtx_unlock(&iq->comp_lock);
+}
+
+void
+mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt)
+{
+	u16 pi = iq->pc & iq->wq.sz_m1;
+	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
+
+	mtx_assert(&iq->lock, MA_OWNED);
+
+	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
+
+	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP);
+	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
+	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+
+	/* Copy data for doorbell */
+	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
+
+	iq->data[pi].callback = NULL;
+	iq->data[pi].arg = NULL;
+	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+	iq->data[pi].dma_sync = 0;
+	iq->pc += iq->data[pi].num_wqebbs;
+}
+
+static void
+mlx5e_iq_free_db(struct mlx5e_iq *iq)
+{
+	int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
+	int x;
+
+	for (x = 0; x != wq_sz; x++) {
+		if (likely(iq->data[x].dma_sync != 0)) {
+			bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map);
+			iq->data[x].dma_sync = 0;
+		}
+		if (likely(iq->data[x].callback != NULL)) {
+			iq->data[x].callback(iq->data[x].arg);
+			iq->data[x].callback = NULL;
+		}
+		bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
+	}
+	free(iq->data, M_MLX5EN);
+}
+
+static int
+mlx5e_iq_alloc_db(struct mlx5e_iq *iq)
+{
+	int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
+	int err;
+	int x;
+
+	iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN,
+	    mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO);
+
+	/* Create DMA descriptor maps */
+	for (x = 0; x != wq_sz; x++) {
+		err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map);
+		if (err != 0) {
+			while (x--)
+				bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
+			free(iq->data, M_MLX5EN);
+			return (err);
+		}
+	}
+	return (0);
+}
+
+static int
+mlx5e_iq_create(struct mlx5e_channel *c,
+    struct mlx5e_sq_param *param,
+    struct mlx5e_iq *iq)
+{
+	struct mlx5e_priv *priv = c->priv;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	void *sqc = param->sqc;
+	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
+	int err;
+
+	/* Create DMA descriptor TAG */
+	if ((err = -bus_dma_tag_create(
+	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
+	    1,				/* any alignment */
+	    0,				/* no boundary */
+	    BUS_SPACE_MAXADDR,		/* lowaddr */
+	    BUS_SPACE_MAXADDR,		/* highaddr */
+	    NULL, NULL,			/* filter, filterarg */
+	    PAGE_SIZE,			/* maxsize */
+	    1,				/* nsegments */
+	    PAGE_SIZE,			/* maxsegsize */
+	    0,				/* flags */
+	    NULL, NULL,			/* lockfunc, lockfuncarg */
+	    &iq->dma_tag)))
+		goto done;
+
+	iq->mkey_be = cpu_to_be32(priv->mr.key);
+	iq->priv = priv;
+
+	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq,
+	    &iq->wq, &iq->wq_ctrl);
+	if (err)
+		goto err_free_dma_tag;
+
+	iq->wq.db = &iq->wq.db[MLX5_SND_DBR];
+
+	err = mlx5e_iq_alloc_db(iq);
+	if (err)
+		goto err_iq_wq_destroy;
+
+	return (0);
+
+err_iq_wq_destroy:
+	mlx5_wq_destroy(&iq->wq_ctrl);
+
+err_free_dma_tag:
+	bus_dma_tag_destroy(iq->dma_tag);
+done:
+	return (err);
+}
+
+static void
+mlx5e_iq_destroy(struct mlx5e_iq *iq)
+{
+	mlx5e_iq_free_db(iq);
+	mlx5_wq_destroy(&iq->wq_ctrl);
+	bus_dma_tag_destroy(iq->dma_tag);
+}
+
+static int
+mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param,
+    const struct mlx5_sq_bfreg *bfreg, int tis_num)
+{
+	void *in;
+	void *sqc;
+	void *wq;
+	int inlen;
+	int err;
+	u8 ts_format;
+
+	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+	    sizeof(u64) * iq->wq_ctrl.buf.npages;
+	in = mlx5_vzalloc(inlen);
+	if (in == NULL)
+		return (-ENOMEM);
+
+	iq->uar_map = bfreg->map;
+
+	ts_format = mlx5_get_sq_default_ts(iq->priv->mdev);
+	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+	wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	memcpy(sqc, param->sqc, sizeof(param->sqc));
+
+	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
+	MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn);
+	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+	MLX5_SET(sqc, sqc, ts_format, ts_format);
+	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+	MLX5_SET(sqc, sqc, allow_swp, 1);
+
+	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+	MLX5_SET(wq, wq, uar_page, bfreg->index);
+	MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift -
+	    PAGE_SHIFT);
+	MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma);
+
+	mlx5_fill_page_array(&iq->wq_ctrl.buf,
+	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
+
+	err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn);
+
+	kvfree(in);
+
+	return (err);
+}
+
+static int
+mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state)
+{
+	void *in;
+	void *sqc;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+	in = mlx5_vzalloc(inlen);
+	if (in == NULL)
+		return (-ENOMEM);
+
+	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+	MLX5_SET(modify_sq_in, in, sqn, iq->sqn);
+	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
+	MLX5_SET(sqc, sqc, state, next_state);
+
+	err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen);
+
+	kvfree(in);
+
+	return (err);
+}
+
+static void
+mlx5e_iq_disable(struct mlx5e_iq *iq)
+{
+	mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn);
+}
+
+int
+mlx5e_iq_open(struct mlx5e_channel *c,
+    struct mlx5e_sq_param *sq_param,
+    struct mlx5e_cq_param *cq_param,
+    struct mlx5e_iq *iq)
+{
+	int err;
+
+	err = mlx5e_open_cq(c->priv, cq_param, &iq->cq,
+	    &mlx5e_iq_completion, c->ix);
+	if (err)
+		return (err);
+
+	err = mlx5e_iq_create(c, sq_param, iq);
+	if (err)
+		goto err_close_cq;
+
+	err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]);
+	if (err)
+		goto err_destroy_sq;
+
+	err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
+	if (err)
+		goto err_disable_sq;
+
+	WRITE_ONCE(iq->running, 1);
+
+	return (0);
+
+err_disable_sq:
+	mlx5e_iq_disable(iq);
+err_destroy_sq:
+	mlx5e_iq_destroy(iq);
+err_close_cq:
+	mlx5e_close_cq(&iq->cq);
+
+	return (err);
+}
+
+static void
+mlx5e_iq_drain(struct mlx5e_iq *iq)
+{
+	struct mlx5_core_dev *mdev = iq->priv->mdev;
+
+	/*
+	 * Check if already stopped.
+	 *
+	 * NOTE: Serialization of this function is managed by the
+	 * caller ensuring the priv's state lock is locked or in case
+	 * of rate limit support, a single thread manages drain and
+	 * resume of SQs. The "running" variable can therefore safely
+	 * be read without any locks.
+	 */
+	if (READ_ONCE(iq->running) == 0)
+		return;
+
+	/* don't put more packets into the SQ */
+	WRITE_ONCE(iq->running, 0);
+
+	/* wait till SQ is empty or link is down */
+	mtx_lock(&iq->lock);
+	while (iq->cc != iq->pc &&
+	    (iq->priv->media_status_last & IFM_ACTIVE) != 0 &&
+	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+	    pci_channel_offline(mdev->pdev) == 0) {
+		mtx_unlock(&iq->lock);
+		msleep(1);
+		iq->cq.mcq.comp(&iq->cq.mcq, NULL);
+		mtx_lock(&iq->lock);
+	}
+	mtx_unlock(&iq->lock);
+
+	/* error out remaining requests */
+	(void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+
+	/* wait till SQ is empty */
+	mtx_lock(&iq->lock);
+	while (iq->cc != iq->pc &&
+	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+	    pci_channel_offline(mdev->pdev) == 0) {
+		mtx_unlock(&iq->lock);
+		msleep(1);
+		iq->cq.mcq.comp(&iq->cq.mcq, NULL);
+		mtx_lock(&iq->lock);
+	}
+	mtx_unlock(&iq->lock);
+}
+
+void
+mlx5e_iq_close(struct mlx5e_iq *iq)
+{
+	mlx5e_iq_drain(iq);
+	mlx5e_iq_disable(iq);
+	mlx5e_iq_destroy(iq);
+	mlx5e_close_cq(&iq->cq);
+}
+
+void
+mlx5e_iq_static_init(struct mlx5e_iq *iq)
+{
+	mtx_init(&iq->lock, "mlx5iq",
+	    MTX_NETWORK_LOCK " IQ", MTX_DEF);
+	mtx_init(&iq->comp_lock, "mlx5iq_comp",
+	    MTX_NETWORK_LOCK " IQ COMP", MTX_DEF);
+}
+
+void
+mlx5e_iq_static_destroy(struct mlx5e_iq *iq)
+{
+	mtx_destroy(&iq->lock);
+	mtx_destroy(&iq->comp_lock);
+}
+
+void
+mlx5e_iq_notify_hw(struct mlx5e_iq *iq)
+{
+	mtx_assert(&iq->lock, MA_OWNED);
+
+	/* Check if we need to write the doorbell */
+	if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0))
+		return;
+
+	/* Ensure wqe is visible to device before updating doorbell record */
+	wmb();
+
+	*iq->wq.db = cpu_to_be32(iq->pc);
+
+	/*
+	 * Ensure the doorbell record is visible to device before ringing
+	 * the doorbell:
+	 */
+	wmb();
+
+	mlx5_write64(iq->doorbell.d32, iq->uar_map,
+	    MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
+
+	iq->doorbell.d64 = 0;
+}
+
+static inline bool
+mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n)
+{
+        u16 cc = iq->cc;
+        u16 pc = iq->pc;
+
+        return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc);
+}
+
+int
+mlx5e_iq_get_producer_index(struct mlx5e_iq *iq)
+{
+	u16 pi;
+
+	mtx_assert(&iq->lock, MA_OWNED);
+
+	if (unlikely(iq->running == 0))
+		return (-1);
+	if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS)))
+		return (-1);
+
+	/* Align IQ edge with NOPs to avoid WQE wrap around */
+	pi = ((~iq->pc) & iq->wq.sz_m1);
+	if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) {
+		/* Send one multi NOP message instead of many */
+		mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
+		pi = ((~iq->pc) & iq->wq.sz_m1);
+		if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)))
+			return (-1);
+	}
+	return (iq->pc & iq->wq.sz_m1);
+}
+
+static void
+mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs,
+    int nseg, int error)
+{
+	u64 *pdma_address = arg;
+
+	if (unlikely(error || nseg != 1))
+		panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg);
+
+	*pdma_address = segs[0].ds_addr;
+}
+
+CTASSERT(BUS_DMASYNC_POSTREAD != 0);
+CTASSERT(BUS_DMASYNC_POSTWRITE != 0);
+
+void
+mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size,
+    u64 *pdma_address, u32 dma_sync)
+{
+	int error;
+
+	error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size,
+	    &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT);
+	if (unlikely(error))
+		panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size);
+
+	switch (dma_sync) {
+	case BUS_DMASYNC_PREREAD:
+		iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD;
+		break;
+	case BUS_DMASYNC_PREWRITE:
+		iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE;
+		break;
+	default:
+		panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync);
+	}
+
+	/* make sure data in buffer is visible to hardware */
+	bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync);
+}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index 10fd591e0c6d..6a7e8ce606af 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -2197,6 +2197,8 @@ mlx5e_chan_static_init(struct mlx5e_priv *priv, struct mlx5e_channel *c, int ix)
 
 		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
 	}
+
+	mlx5e_iq_static_init(&c->iq);
 }
 
 static void
@@ -2230,6 +2232,8 @@ mlx5e_chan_static_destroy(struct mlx5e_channel *c)
 		mtx_destroy(&c->sq[tc].lock);
 		mtx_destroy(&c->sq[tc].comp_lock);
 	}
+
+	mlx5e_iq_static_destroy(&c->iq);
 }
 
 static int
@@ -2244,6 +2248,7 @@ mlx5e_open_channel(struct mlx5e_priv *priv,
 	MLX5E_ZERO(&c->rq, mlx5e_rq_zero_start);
 	for (i = 0; i != priv->num_tc; i++)
 		MLX5E_ZERO(&c->sq[i], mlx5e_sq_zero_start);
+	MLX5E_ZERO(&c->iq, mlx5e_iq_zero_start);
 
 	/* open transmit completion queue */
 	err = mlx5e_open_tx_cqs(c, cparam);
@@ -2260,10 +2265,14 @@ mlx5e_open_channel(struct mlx5e_priv *priv,
 	if (err)
 		goto err_close_rx_cq;
 
-	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
+	err = mlx5e_iq_open(c, &cparam->sq, &cparam->tx_cq, &c->iq);
 	if (err)
 		goto err_close_sqs;
 
+	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
+	if (err)
+		goto err_close_iq;
+
 	/* poll receive queue initially */
 	NET_EPOCH_ENTER(et);
 	c->rq.cq.mcq.comp(&c->rq.cq.mcq, NULL);
@@ -2271,6 +2280,9 @@ mlx5e_open_channel(struct mlx5e_priv *priv,
 
 	return (0);
 
+err_close_iq:
+	mlx5e_iq_close(&c->iq);
+
 err_close_sqs:
 	mlx5e_close_sqs_wait(c);
 
@@ -2294,6 +2306,7 @@ static void
 mlx5e_close_channel_wait(struct mlx5e_channel *c)
 {
 	mlx5e_close_rq_wait(&c->rq);
+	mlx5e_iq_close(&c->iq);
 	mlx5e_close_sqs_wait(c);
 	mlx5e_close_tx_cqs(c);
 }
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
index ec6b027b324e..522e3f09df2c 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@@ -591,6 +591,10 @@ mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
 		mtx_unlock(&c->sq[j].lock);
 	}
 
+	mtx_lock(&c->iq.lock);
+	c->iq.db_inhibit++;
+	mtx_unlock(&c->iq.lock);
+
 	mtx_lock(&rq->mtx);
 
 	/*
@@ -621,4 +625,9 @@ mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
 		mlx5e_tx_notify_hw(c->sq + j, true);
 		mtx_unlock(&c->sq[j].lock);
 	}
+
+	mtx_lock(&c->iq.lock);
+	c->iq.db_inhibit--;
+	mlx5e_iq_notify_hw(&c->iq);
+	mtx_unlock(&c->iq.lock);
 }
diff --git a/sys/modules/mlx5en/Makefile b/sys/modules/mlx5en/Makefile
index 8ecf2c77f117..4c192a860f4e 100644
--- a/sys/modules/mlx5en/Makefile
+++ b/sys/modules/mlx5en/Makefile
@@ -9,6 +9,7 @@ mlx5_en_main.c \
 mlx5_en_tx.c \
 mlx5_en_flow_table.c \
 mlx5_en_hw_tls.c \
+mlx5_en_iq.c \
 mlx5_en_rx.c \
 mlx5_en_rl.c \
 mlx5_en_txrx.c \