git: 84d7b8e75f25 - main - mlx5en: Implement TLS RX support.

From: Hans Petter Selasky <hselasky_at_FreeBSD.org>
Date: Tue, 01 Feb 2022 15:24:39 UTC
The branch main has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=84d7b8e75f251ee5b33a92c3cd268396809b015b

commit 84d7b8e75f251ee5b33a92c3cd268396809b015b
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2022-02-01 15:20:16 +0000
Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2022-02-01 15:21:17 +0000

    mlx5en: Implement TLS RX support.
    
    TLS RX support is modeled after TLS TX support. The basic structures and layouts
    are almost identical, except that the send tag created filters RX traffic and
    not TX traffic.
    
    The TLS RX tag keeps track of past TLS records up to a certain limit,
    approximately 1 Gbyte of TCP data. TLS records of same length are joined
    into a single database record.
    
    Regularly the HW is queried for TLS RX progress information. The TCP sequence
    number gotten from the HW is then matches against the database of TLS TCP
    sequence number records and lengths. If a match is found a static params WQE
    is queued on the IQ and the hardware should immediately resume decrypting TLS
    data until the next non-sequential TCP packet arrives.
    
    Offloading TLS RX data is supported for untagged, prio-tagged, and
    regular VLAN traffic.
    
    MFC after:      1 week
    Sponsored by:   NVIDIA Networking
---
 sys/conf/files                           |    2 +
 sys/dev/mlx5/device.h                    |   20 +
 sys/dev/mlx5/mlx5_en/en.h                |   10 +-
 sys/dev/mlx5/mlx5_en/en_hw_tls_rx.h      |  149 +++++
 sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c | 1008 ++++++++++++++++++++++++++++++
 sys/dev/mlx5/mlx5_en/mlx5_en_main.c      |   26 +
 sys/dev/mlx5/mlx5_en/mlx5_en_rx.c        |   13 +
 sys/dev/mlx5/mlx5_ifc.h                  |    2 +-
 sys/modules/mlx5en/Makefile              |    1 +
 9 files changed, 1228 insertions(+), 3 deletions(-)

diff --git a/sys/conf/files b/sys/conf/files
index 5f452f851ea6..0278d5fa4083 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4929,6 +4929,8 @@ dev/mlx5/mlx5_en/mlx5_en_flow_table.c		optional mlx5en pci inet inet6	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_hw_tls.c		optional mlx5en pci inet inet6	\
 	compile-with "${OFED_C}"
+dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c		optional mlx5en pci inet inet6	\
+	compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_iq.c			optional mlx5en pci inet inet6	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_rx.c			optional mlx5en pci inet inet6	\
diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h
index f183ca74c2d7..e2868c62337f 100644
--- a/sys/dev/mlx5/device.h
+++ b/sys/dev/mlx5/device.h
@@ -408,6 +408,14 @@ enum {
 	MLX5_OPCODE_MOD_PSV_TLS_TIR_PROGRESS_PARAMS = 0x2,
 };
 
+struct mlx5_wqe_tls_static_params_seg {
+	u8     ctx[MLX5_ST_SZ_BYTES(tls_static_params)];
+};
+
+struct mlx5_wqe_tls_progress_params_seg {
+	u8     ctx[MLX5_ST_SZ_BYTES(tls_progress_params)];
+} __aligned(64);
+
 enum {
 	MLX5_SET_PORT_RESET_QKEY	= 0,
 	MLX5_SET_PORT_GUID0		= 16,
@@ -750,6 +758,11 @@ static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe)
 	return cqe->tls_outer_l3_tunneled & 0x1;
 }
 
+static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe)
+{
+	return (cqe->tls_outer_l3_tunneled >> 3) & 0x3;
+}
+
 enum {
 	CQE_L4_HDR_TYPE_NONE			= 0x0,
 	CQE_L4_HDR_TYPE_TCP_NO_ACK		= 0x1,
@@ -794,6 +807,13 @@ enum {
 	CQE_L4_OK	= 1 << 2,
 };
 
+enum {
+	CQE_TLS_OFFLOAD_NOT_DECRYPTED		= 0x0,
+	CQE_TLS_OFFLOAD_DECRYPTED		= 0x1,
+	CQE_TLS_OFFLOAD_RESYNC			= 0x2,
+	CQE_TLS_OFFLOAD_ERROR			= 0x3,
+};
+
 struct mlx5_sig_err_cqe {
 	u8		rsvd0[16];
 	__be32		expected_trans_sig;
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index 50215defdf60..1c8a53b1ba4d 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -207,7 +207,9 @@ typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *, struct mlx5_eqe *);
   m(+1, u64, tx_defragged, "tx_defragged", "Transmit queue defragged") \
   m(+1, u64, rx_wqe_err, "rx_wqe_err", "Receive WQE errors") \
   m(+1, u64, tx_jumbo_packets, "tx_jumbo_packets", "TX packets greater than 1518 octets") \
-  m(+1, u64, rx_steer_missed_packets, "rx_steer_missed_packets", "RX packets dropped by steering rule(s)")
+  m(+1, u64, rx_steer_missed_packets, "rx_steer_missed_packets", "RX packets dropped by steering rule(s)") \
+  m(+1, u64, rx_decrypted_ok_packets, "rx_decrypted_ok_packets", "RX packets successfully decrypted by steering rule(s)") \
+  m(+1, u64, rx_decrypted_error_packets, "rx_decrypted_error_packets", "RX packets not decrypted by steering rule(s)")
 
 #define	MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT))
 
@@ -608,7 +610,9 @@ struct mlx5e_port_stats_debug {
   m(+1, u64, lro_bytes, "lro_bytes", "Received LRO bytes")	\
   m(+1, u64, sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO")	\
   m(+1, u64, sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO")	\
-  m(+1, u64, wqe_err, "wqe_err", "Received packets")
+  m(+1, u64, wqe_err, "wqe_err", "Received packets") \
+  m(+1, u64, decrypted_ok_packets, "decrypted_ok_packets", "Received packets successfully decrypted by steering rule(s)") \
+  m(+1, u64, decrypted_error_packets, "decrypted_error_packets", "Received packets not decrypted by steering rule(s)")
 
 #define	MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT))
 
@@ -1050,6 +1054,7 @@ struct mlx5e_xmit_args {
 
 #include <dev/mlx5/mlx5_en/en_rl.h>
 #include <dev/mlx5/mlx5_en/en_hw_tls.h>
+#include <dev/mlx5/mlx5_en/en_hw_tls_rx.h>
 
 #define	MLX5E_TSTMP_PREC 10
 
@@ -1131,6 +1136,7 @@ struct mlx5e_priv {
 	struct mlx5e_rl_priv_data rl;
 
 	struct mlx5e_tls tls;
+	struct mlx5e_tls_rx tls_rx;
 
 	struct callout tstmp_clbr;
 	int	clbr_done;
diff --git a/sys/dev/mlx5/mlx5_en/en_hw_tls_rx.h b/sys/dev/mlx5/mlx5_en/en_hw_tls_rx.h
new file mode 100644
index 000000000000..6954b97b827b
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/en_hw_tls_rx.h
@@ -0,0 +1,149 @@
+/*-
+ * Copyright (c) 2021 NVIDIA corporation & affiliates.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MLX5_TLS_RX_H_
+#define	_MLX5_TLS_RX_H_
+
+#include <linux/completion.h>
+
+#define	MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE 128
+
+#define	MLX5E_TLS_RX_RESYNC_MAX 32	/* units */
+#define	MLX5E_TLS_RX_NUM_MAX (1U << 11)	/* packets */
+
+#define	MLX5E_TLS_RX_TAG_LOCK(tag)	mtx_lock(&(tag)->mtx)
+#define	MLX5E_TLS_RX_TAG_UNLOCK(tag)	mtx_unlock(&(tag)->mtx)
+
+#define	MLX5E_TLS_RX_STAT_INC(tag, field, num) \
+	counter_u64_add((tag)->tls_rx->stats.field, num)
+
+#if ((MLX5E_TLS_RX_RESYNC_MAX * MLX5E_TLS_RX_NUM_MAX) << 14) > (1U << 30)
+#error "Please lower the limits of the TLS record length database."
+#endif
+
+enum {
+	MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
+	MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1,
+	MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2,
+};
+
+enum {
+	MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0,
+	MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 1,
+	MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 2,
+};
+
+struct mlx5e_tls_rx;
+struct mlx5e_tls_rx_tag {
+	struct m_snd_tag tag;
+	volatile s32 refs;	/* number of pending mbufs */
+	uint32_t tirn;		/* HW TIR context number */
+	uint32_t dek_index;	/* HW TLS context number */
+	struct mlx5e_tls_rx *tls_rx; /* parent pointer */
+	struct mlx5_flow_rule *flow_rule;
+	struct mtx mtx;
+	struct completion progress_complete;
+	uint32_t state;	/* see MLX5E_TLS_RX_ST_XXX */
+#define	MLX5E_TLS_RX_ST_INIT 0
+#define	MLX5E_TLS_RX_ST_SETUP 1
+#define	MLX5E_TLS_RX_ST_READY 2
+#define	MLX5E_TLS_RX_ST_FREED 3
+
+	/*
+	 * The following fields are used to store the TCP starting
+	 * point of TLS records in the past. When TLS records of same
+	 * length are back to back the tcp_resync_num[] is incremented
+	 * instead of creating new entries. This way up to
+	 * "MLX5E_TLS_RX_RESYNC_MAX" * "MLX5E_TLS_RX_NUM_MAX" * 16
+	 * KBytes, around 1GByte worth of TCP data, may be remembered
+	 * in the good case. The amount of history should not exceed
+	 * 2GBytes of TCP data, because then the TCP sequence numbers
+	 * may wrap around.
+	 *
+	 * This information is used to tell if a given TCP sequence
+	 * number is a valid TLS record or not.
+	 */
+	uint64_t rcd_resync_start;	/* starting TLS record number */
+	uint32_t tcp_resync_start;	/* starting TCP sequence number */
+	uint32_t tcp_resync_next;	/* next expected TCP sequence number */
+	uint32_t tcp_resync_len[MLX5E_TLS_RX_RESYNC_MAX];
+	uint32_t tcp_resync_num[MLX5E_TLS_RX_RESYNC_MAX];
+	uint16_t tcp_resync_pc;		/* producer counter for arrays above */
+	uint16_t tcp_resync_cc;		/* consumer counter for arrays above */
+
+	struct work_struct work;
+
+	uint32_t flowid;
+	uint32_t flowtype;
+	uint32_t dek_index_ok:1;
+	uint32_t tcp_resync_active:1;
+	uint32_t tcp_resync_pending:1;
+
+	/* parameters needed */
+	uint8_t crypto_params[128] __aligned(4);
+	uint8_t rx_progress[MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE * 2];
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+static inline void *
+mlx5e_tls_rx_get_progress_buffer(struct mlx5e_tls_rx_tag *ptag)
+{
+	/* return properly aligned RX buffer */
+	return (ptag->rx_progress +
+	    ((-(uintptr_t)ptag->rx_progress) &
+	    (MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE - 1)));
+}
+
+#define	MLX5E_TLS_RX_STATS(m) \
+  m(+1, u64, rx_resync_ok, "rx_resync_ok", "Successful resync requests")\
+  m(+1, u64, rx_resync_err, "rx_resync_err", "Failed resync requests")\
+  m(+1, u64, rx_error, "rx_error", "Other errors")
+
+#define	MLX5E_TLS_RX_STATS_NUM (0 MLX5E_TLS_RX_STATS(MLX5E_STATS_COUNT))
+
+struct mlx5e_tls_rx_stats {
+	struct	sysctl_ctx_list ctx;
+	counter_u64_t	arg[0];
+	MLX5E_TLS_RX_STATS(MLX5E_STATS_COUNTER)
+};
+
+struct mlx5e_tls_rx {
+	struct sysctl_ctx_list ctx;
+	struct mlx5e_tls_rx_stats stats;
+	struct workqueue_struct *wq;
+	uma_zone_t zone;
+	uint32_t max_resources;		/* max number of resources */
+	volatile uint32_t num_resources;	/* current number of resources */
+	int init;			/* set when ready */
+	char zname[32];
+};
+
+int mlx5e_tls_rx_init(struct mlx5e_priv *);
+void mlx5e_tls_rx_cleanup(struct mlx5e_priv *);
+
+if_snd_tag_alloc_t mlx5e_tls_rx_snd_tag_alloc;
+
+#endif		/* _MLX5_TLS_RX_H_ */
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c
new file mode 100644
index 000000000000..5ac60c51a1db
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c
@@ -0,0 +1,1008 @@
+/*-
+ * Copyright (c) 2021 NVIDIA corporation & affiliates.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_kern_tls.h"
+#include "opt_rss.h"
+#include "opt_ratelimit.h"
+
+#include <dev/mlx5/mlx5_en/en.h>
+
+#include <dev/mlx5/tls.h>
+
+#include <dev/mlx5/fs.h>
+#include <dev/mlx5/mlx5_core/fs_tcp.h>
+
+#include <sys/ktls.h>
+#include <opencrypto/cryptodev.h>
+
+#ifdef KERN_TLS
+
+static if_snd_tag_free_t mlx5e_tls_rx_snd_tag_free;
+static if_snd_tag_modify_t mlx5e_tls_rx_snd_tag_modify;
+
+static const struct if_snd_tag_sw mlx5e_tls_rx_snd_tag_sw = {
+	.snd_tag_modify = mlx5e_tls_rx_snd_tag_modify,
+	.snd_tag_free = mlx5e_tls_rx_snd_tag_free,
+	.type = IF_SND_TAG_TYPE_TLS_RX
+};
+
+MALLOC_DEFINE(M_MLX5E_TLS_RX, "MLX5E_TLS_RX", "MLX5 ethernet HW TLS RX");
+
+/* software TLS RX context */
+struct mlx5_ifc_sw_tls_rx_cntx_bits {
+	struct mlx5_ifc_tls_static_params_bits param;
+	struct mlx5_ifc_tls_progress_params_bits progress;
+	struct {
+		uint8_t key_data[8][0x20];
+		uint8_t key_len[0x20];
+	} key;
+};
+
+CTASSERT(MLX5_ST_SZ_BYTES(sw_tls_rx_cntx) <= sizeof(((struct mlx5e_tls_rx_tag *)NULL)->crypto_params));
+CTASSERT(MLX5_ST_SZ_BYTES(mkc) == sizeof(((struct mlx5e_tx_umr_wqe *)NULL)->mkc));
+
+static const char *mlx5e_tls_rx_stats_desc[] = {
+	MLX5E_TLS_RX_STATS(MLX5E_STATS_DESC)
+};
+
+static void mlx5e_tls_rx_work(struct work_struct *);
+static bool mlx5e_tls_rx_snd_tag_find_tcp_sn_and_tls_rcd(struct mlx5e_tls_rx_tag *,
+    uint32_t, uint32_t *, uint64_t *);
+
+CTASSERT((MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param) % 16) == 0);
+
+static uint32_t
+mlx5e_tls_rx_get_ch(struct mlx5e_priv *priv, uint32_t flowid, uint32_t flowtype)
+{
+	u32 ch;
+#ifdef RSS
+	u32 temp;
+#endif
+
+	/* keep this code synced with mlx5e_select_queue() */
+	ch = priv->params.num_channels;
+#ifdef RSS
+	if (rss_hash2bucket(flowid, flowtype, &temp) == 0)
+		ch = temp % ch;
+	else
+#endif
+		ch = (flowid % 128) % ch;
+	return (ch);
+}
+
+/*
+ * This function gets a pointer to an internal queue, IQ, based on the
+ * provided "flowid" and "flowtype". The IQ returned may in some rare
+ * cases not be activated or running, but this is all handled by the
+ * "mlx5e_iq_get_producer_index()" function.
+ *
+ * The idea behind this function is to spread the IQ traffic as much
+ * as possible and to avoid congestion on the same IQ when processing
+ * RX traffic.
+ */
+static struct mlx5e_iq *
+mlx5e_tls_rx_get_iq(struct mlx5e_priv *priv, uint32_t flowid, uint32_t flowtype)
+{
+	/*
+	 * NOTE: The channels array is only freed at detach
+	 * and it safe to return a pointer to the send tag
+	 * inside the channels structure as long as we
+	 * reference the priv.
+	 */
+	return (&priv->channel[mlx5e_tls_rx_get_ch(priv, flowid, flowtype)].iq);
+}
+
+/*
+ * This function sends the so-called TLS RX static parameters to the
+ * hardware. These parameters are temporarily stored in the
+ * "crypto_params" field of the TLS RX tag.  Most importantly this
+ * function sets the TCP sequence number (32-bit) and TLS record
+ * number (64-bit) where the decryption can resume.
+ *
+ * Zero is returned upon success. Else some error happend.
+ */
+static int
+mlx5e_tls_rx_send_static_parameters(struct mlx5e_iq *iq, struct mlx5e_tls_rx_tag *ptag)
+{
+	const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_umr_wqe) +
+	    MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param), MLX5_SEND_WQE_DS);
+	struct mlx5e_tx_umr_wqe *wqe;
+	int pi;
+
+	mtx_lock(&iq->lock);
+	pi = mlx5e_iq_get_producer_index(iq);
+	if (pi < 0) {
+		mtx_unlock(&iq->lock);
+		return (-ENOMEM);
+	}
+	wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) |
+	    MLX5_OPCODE_UMR | (MLX5_OPCODE_MOD_UMR_TLS_TIR_STATIC_PARAMS << 24));
+	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
+	wqe->ctrl.imm = cpu_to_be32(ptag->tirn << 8);
+	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+	/* fill out UMR control segment */
+	wqe->umr.flags = 0x80;	/* inline data */
+	wqe->umr.bsf_octowords =
+	    cpu_to_be16(MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param) / 16);
+
+	/* copy in the static crypto parameters */
+	memcpy(wqe + 1, MLX5_ADDR_OF(sw_tls_rx_cntx, ptag->crypto_params, param),
+	    MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param));
+
+	/* copy data for doorbell */
+	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
+
+	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+
+	iq->data[pi].p_refcount = &ptag->refs;
+	atomic_add_int(&ptag->refs, 1);
+	iq->pc += iq->data[pi].num_wqebbs;
+
+	mlx5e_iq_notify_hw(iq);
+
+	mtx_unlock(&iq->lock);
+
+	return (0);	/* success */
+}
+
+static void
+mlx5e_tls_rx_send_progress_parameters_cb(void *arg)
+{
+	struct mlx5e_tls_rx_tag *ptag;
+
+	ptag = (struct mlx5e_tls_rx_tag *)arg;
+
+	complete(&ptag->progress_complete);
+}
+
+CTASSERT(MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, progress) ==
+    sizeof(((struct mlx5e_tx_psv_wqe *)NULL)->psv));
+
+/*
+ * This function resets the state of the TIR context to start
+ * searching for a valid TLS header and is used only when allocating
+ * the TLS RX tag.
+ *
+ * Zero is returned upon success, else some error happened.
+ */
+static int
+mlx5e_tls_rx_send_progress_parameters_sync(struct mlx5e_iq *iq,
+    struct mlx5e_tls_rx_tag *ptag)
+{
+	const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_psv_wqe),
+	    MLX5_SEND_WQE_DS);
+	struct mlx5e_priv *priv;
+	struct mlx5e_tx_psv_wqe *wqe;
+	int pi;
+
+	mtx_lock(&iq->lock);
+	pi = mlx5e_iq_get_producer_index(iq);
+	if (pi < 0) {
+		mtx_unlock(&iq->lock);
+		return (-ENOMEM);
+	}
+	wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) |
+	    MLX5_OPCODE_SET_PSV | (MLX5_OPCODE_MOD_PSV_TLS_TIR_PROGRESS_PARAMS << 24));
+	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
+	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+
+	/* copy in the PSV control segment */
+	memcpy(&wqe->psv, MLX5_ADDR_OF(sw_tls_rx_cntx, ptag->crypto_params, progress),
+	    sizeof(wqe->psv));
+
+	/* copy data for doorbell */
+	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
+
+	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+	iq->data[pi].callback = &mlx5e_tls_rx_send_progress_parameters_cb;
+	iq->data[pi].arg = ptag;
+	iq->data[pi].p_refcount = &ptag->refs;
+	atomic_add_int(&ptag->refs, 1);
+	iq->pc += iq->data[pi].num_wqebbs;
+
+	init_completion(&ptag->progress_complete);
+
+	mlx5e_iq_notify_hw(iq);
+
+	mtx_unlock(&iq->lock);
+
+	while (1) {
+		if (wait_for_completion_timeout(&ptag->progress_complete, hz) != 0)
+			break;
+		priv = container_of(iq, struct mlx5e_channel, iq)->priv;
+		if (priv->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+		    pci_channel_offline(priv->mdev->pdev) != 0)
+			return (-EWOULDBLOCK);
+	}
+
+	return (0);	/* success */
+}
+
+CTASSERT(MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE >= MLX5_ST_SZ_BYTES(tls_progress_params));
+CTASSERT(MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE <= PAGE_SIZE);
+
+struct mlx5e_get_tls_progress_params_wqe {
+	struct mlx5_wqe_ctrl_seg ctrl;
+	struct mlx5_seg_get_psv	 psv;
+};
+
+static void
+mlx5e_tls_rx_receive_progress_parameters_cb(void *arg)
+{
+	struct mlx5e_tls_rx_tag *ptag;
+	struct mlx5e_iq *iq;
+	uint32_t tcp_curr_sn_he;
+	uint32_t tcp_next_sn_he;
+	uint64_t tls_rcd_num;
+	void *buffer;
+
+	ptag = (struct mlx5e_tls_rx_tag *)arg;
+	buffer = mlx5e_tls_rx_get_progress_buffer(ptag);
+
+	MLX5E_TLS_RX_TAG_LOCK(ptag);
+
+	ptag->tcp_resync_pending = 0;
+
+	switch (MLX5_GET(tls_progress_params, buffer, record_tracker_state)) {
+	case MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING:
+		break;
+	default:
+		goto done;
+	}
+
+	switch (MLX5_GET(tls_progress_params, buffer, auth_state)) {
+	case MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD:
+		break;
+	default:
+		goto done;
+	}
+
+	tcp_curr_sn_he = MLX5_GET(tls_progress_params, buffer, hw_resync_tcp_sn);
+
+	if (mlx5e_tls_rx_snd_tag_find_tcp_sn_and_tls_rcd(ptag, tcp_curr_sn_he,
+	    &tcp_next_sn_he, &tls_rcd_num)) {
+
+		MLX5_SET64(sw_tls_rx_cntx, ptag->crypto_params,
+		    param.initial_record_number, tls_rcd_num);
+		MLX5_SET(sw_tls_rx_cntx, ptag->crypto_params,
+		    param.resync_tcp_sn, tcp_curr_sn_he);
+
+		iq = mlx5e_tls_rx_get_iq(
+		    container_of(ptag->tls_rx, struct mlx5e_priv, tls_rx),
+		    ptag->flowid, ptag->flowtype);
+
+		if (mlx5e_tls_rx_send_static_parameters(iq, ptag) != 0)
+			MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
+	}
+done:
+	MLX5E_TLS_RX_TAG_UNLOCK(ptag);
+}
+
+/*
+ * This function queries the hardware for the current state of the TIR
+ * in question. It is typically called when encrypted data is received
+ * to re-establish hardware decryption of received TLS data.
+ *
+ * Zero is returned upon success, else some error happened.
+ */
+static int
+mlx5e_tls_rx_receive_progress_parameters(struct mlx5e_iq *iq, struct mlx5e_tls_rx_tag *ptag)
+{
+	struct mlx5e_get_tls_progress_params_wqe *wqe;
+	const u32 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
+	u64 dma_address;
+	int pi;
+
+	mtx_lock(&iq->lock);
+	pi = mlx5e_iq_get_producer_index(iq);
+	if (pi < 0) {
+		mtx_unlock(&iq->lock);
+		return (-ENOMEM);
+	}
+
+	mlx5e_iq_load_memory_single(iq, pi,
+	    mlx5e_tls_rx_get_progress_buffer(ptag),
+	    MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE,
+	    &dma_address, BUS_DMASYNC_PREREAD);
+
+	wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) |
+	    MLX5_OPCODE_GET_PSV | (MLX5_OPCODE_MOD_PSV_TLS_TIR_PROGRESS_PARAMS << 24));
+	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
+	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+	wqe->psv.num_psv = 1 << 4;
+	wqe->psv.l_key = iq->mkey_be;
+	wqe->psv.psv_index[0] = cpu_to_be32(ptag->tirn);
+	wqe->psv.va = cpu_to_be64(dma_address);
+
+	/* copy data for doorbell */
+	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
+
+	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+	iq->data[pi].p_refcount = &ptag->refs;
+	iq->data[pi].callback = &mlx5e_tls_rx_receive_progress_parameters_cb;
+	iq->data[pi].arg = ptag;
+	atomic_add_int(&ptag->refs, 1);
+	iq->pc += iq->data[pi].num_wqebbs;
+
+	mlx5e_iq_notify_hw(iq);
+
+	mtx_unlock(&iq->lock);
+
+	return (0);	/* success */
+}
+
+/*
+ * This is the import function for TLS RX tags.
+ */
+static int
+mlx5e_tls_rx_tag_import(void *arg, void **store, int cnt, int domain, int flags)
+{
+	struct mlx5e_tls_rx_tag *ptag;
+	int i;
+
+	for (i = 0; i != cnt; i++) {
+		ptag = malloc_domainset(sizeof(*ptag), M_MLX5E_TLS_RX,
+		    mlx5_dev_domainset(arg), flags | M_ZERO);
+		mtx_init(&ptag->mtx, "mlx5-tls-rx-tag-mtx", NULL, MTX_DEF);
+		INIT_WORK(&ptag->work, mlx5e_tls_rx_work);
+		store[i] = ptag;
+	}
+	return (i);
+}
+
+/*
+ * This is the release function for TLS RX tags.
+ */
+static void
+mlx5e_tls_rx_tag_release(void *arg, void **store, int cnt)
+{
+	struct mlx5e_tls_rx_tag *ptag;
+	int i;
+
+	for (i = 0; i != cnt; i++) {
+		ptag = store[i];
+
+		flush_work(&ptag->work);
+		mtx_destroy(&ptag->mtx);
+		free(ptag, M_MLX5E_TLS_RX);
+	}
+}
+
+/*
+ * This is a convenience function to free TLS RX tags. It resets some
+ * selected fields, updates the number of resources and returns the
+ * TLS RX tag to the UMA pool of free tags.
+ */
+static void
+mlx5e_tls_rx_tag_zfree(struct mlx5e_tls_rx_tag *ptag)
+{
+	/* reset some variables */
+	ptag->state = MLX5E_TLS_RX_ST_INIT;
+	ptag->dek_index = 0;
+	ptag->dek_index_ok = 0;
+	ptag->tirn = 0;
+	ptag->flow_rule = NULL;
+	ptag->tcp_resync_active = 0;
+	ptag->tcp_resync_pending = 0;
+
+	/* avoid leaking keys */
+	memset(ptag->crypto_params, 0, sizeof(ptag->crypto_params));
+
+	/* update number of resources in use */
+	atomic_add_32(&ptag->tls_rx->num_resources, -1U);
+
+	/* return tag to UMA */
+	uma_zfree(ptag->tls_rx->zone, ptag);
+}
+
+/*
+ * This function enables TLS RX support for the given NIC, if all
+ * needed firmware capabilites are present.
+ */
+int
+mlx5e_tls_rx_init(struct mlx5e_priv *priv)
+{
+	struct mlx5e_tls_rx *ptls = &priv->tls_rx;
+	struct sysctl_oid *node;
+	uint32_t x;
+
+	if (MLX5_CAP_GEN(priv->mdev, tls_rx) == 0 ||
+	    MLX5_CAP_GEN(priv->mdev, log_max_dek) == 0 ||
+	    MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version) == 0)
+		return (0);
+
+	ptls->wq = create_singlethread_workqueue("mlx5-tls-rx-wq");
+	if (ptls->wq == NULL)
+		return (ENOMEM);
+
+	sysctl_ctx_init(&ptls->ctx);
+
+	snprintf(ptls->zname, sizeof(ptls->zname),
+	    "mlx5_%u_tls_rx", device_get_unit(priv->mdev->pdev->dev.bsddev));
+
+	ptls->zone = uma_zcache_create(ptls->zname,
+	    sizeof(struct mlx5e_tls_rx_tag), NULL, NULL, NULL, NULL,
+	    mlx5e_tls_rx_tag_import, mlx5e_tls_rx_tag_release, priv->mdev, 0);
+
+	/* shared between RX and TX TLS */
+	ptls->max_resources = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_dek) - 1);
+
+	for (x = 0; x != MLX5E_TLS_RX_STATS_NUM; x++)
+		ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK);
+
+	ptls->init = 1;
+
+	node = SYSCTL_ADD_NODE(&priv->sysctl_ctx,
+	    SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO,
+	    "tls_rx", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Hardware TLS receive offload");
+	if (node == NULL)
+		return (0);
+
+	mlx5e_create_counter_stats(&ptls->ctx,
+	    SYSCTL_CHILDREN(node), "stats",
+	    mlx5e_tls_rx_stats_desc, MLX5E_TLS_RX_STATS_NUM,
+	    ptls->stats.arg);
+
+	return (0);
+}
+
+/*
+ * This function disables TLS RX support for the given NIC.
+ */
+void
+mlx5e_tls_rx_cleanup(struct mlx5e_priv *priv)
+{
+	struct mlx5e_tls_rx *ptls = &priv->tls_rx;
+	uint32_t x;
+
+	if (ptls->init == 0)
+		return;
+
+	ptls->init = 0;
+	flush_workqueue(ptls->wq);
+	sysctl_ctx_free(&ptls->ctx);
+	uma_zdestroy(ptls->zone);
+	destroy_workqueue(ptls->wq);
+
+	/* check if all resources are freed */
+	MPASS(priv->tls_rx.num_resources == 0);
+
+	for (x = 0; x != MLX5E_TLS_RX_STATS_NUM; x++)
+		counter_u64_free(ptls->stats.arg[x]);
+}
+
+/*
+ * This function is used to serialize sleeping firmware operations
+ * needed in order to establish and destroy a TLS RX tag.
+ */
+static void
+mlx5e_tls_rx_work(struct work_struct *work)
+{
+	struct mlx5e_tls_rx_tag *ptag;
+	struct mlx5e_priv *priv;
+	int err;
+
+	ptag = container_of(work, struct mlx5e_tls_rx_tag, work);
+	priv = container_of(ptag->tls_rx, struct mlx5e_priv, tls_rx);
+
+	switch (ptag->state) {
+	case MLX5E_TLS_RX_ST_INIT:
+		/* try to allocate new TIR context */
+		err = mlx5_tls_open_tir(priv->mdev, priv->tdn,
+		    priv->channel[mlx5e_tls_rx_get_ch(priv, ptag->flowid, ptag->flowtype)].rqtn,
+		    &ptag->tirn);
+		if (err) {
+			MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
+			break;
+		}
+		MLX5_SET(sw_tls_rx_cntx, ptag->crypto_params, progress.pd, ptag->tirn);
+
+		/* try to allocate a DEK context ID */
+		err = mlx5_encryption_key_create(priv->mdev, priv->pdn,
+		    MLX5_ADDR_OF(sw_tls_rx_cntx, ptag->crypto_params, key.key_data),
+		    MLX5_GET(sw_tls_rx_cntx, ptag->crypto_params, key.key_len),
+		    &ptag->dek_index);
+		if (err) {
+			MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
+			break;
+		}
+
+		MLX5_SET(sw_tls_rx_cntx, ptag->crypto_params, param.dek_index, ptag->dek_index);
+
+		ptag->dek_index_ok = 1;
+
+		MLX5E_TLS_RX_TAG_LOCK(ptag);
+		if (ptag->state == MLX5E_TLS_RX_ST_INIT)
+			ptag->state = MLX5E_TLS_RX_ST_SETUP;
+		MLX5E_TLS_RX_TAG_UNLOCK(ptag);
+		break;
+
+	case MLX5E_TLS_RX_ST_FREED:
+		/* remove flow rule for incoming traffic, if any */
+		if (ptag->flow_rule != NULL)
+			mlx5e_accel_fs_del_inpcb(ptag->flow_rule);
+
+		/* wait for all refs to go away */
+		while (ptag->refs != 0)
+			msleep(1);
+
+		/* try to destroy DEK context by ID */
+		if (ptag->dek_index_ok)
+			mlx5_encryption_key_destroy(priv->mdev, ptag->dek_index);
+
+		/* try to destroy TIR context by ID */
+		if (ptag->tirn != 0)
+			mlx5_tls_close_tir(priv->mdev, ptag->tirn);
+
+		/* free tag */
+		mlx5e_tls_rx_tag_zfree(ptag);
+		break;
+
+	default:
+		break;
+	}
+}
+
+/*
+ * This function translates the crypto parameters into the format used
+ * by the firmware and hardware. Currently only AES-128 and AES-256 is
+ * supported for TLS v1.2 and TLS v1.3.
+ *
+ * Returns zero on success, else an error happened.
+ */
+static int
+mlx5e_tls_rx_set_params(void *ctx, struct inpcb *inp, const struct tls_session_params *en)
+{
+	uint32_t tcp_sn_he;
+	uint64_t tls_sn_he;
+
+	MLX5_SET(sw_tls_rx_cntx, ctx, param.const_2, 2);
+	if (en->tls_vminor == TLS_MINOR_VER_TWO)
+		MLX5_SET(sw_tls_rx_cntx, ctx, param.tls_version, 2); /* v1.2 */
+	else
+		MLX5_SET(sw_tls_rx_cntx, ctx, param.tls_version, 3); /* v1.3 */
+	MLX5_SET(sw_tls_rx_cntx, ctx, param.const_1, 1);
+	MLX5_SET(sw_tls_rx_cntx, ctx, param.encryption_standard, 1); /* TLS */
+
+	/* copy the initial vector in place */
+	switch (en->iv_len) {
+	case MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param.gcm_iv):
+	case MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param.gcm_iv) +
+	     MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param.implicit_iv):
+		memcpy(MLX5_ADDR_OF(sw_tls_rx_cntx, ctx, param.gcm_iv),
+		    en->iv, en->iv_len);
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	if (en->cipher_key_len <= MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, key.key_data)) {
+		memcpy(MLX5_ADDR_OF(sw_tls_rx_cntx, ctx, key.key_data),
+		    en->cipher_key, en->cipher_key_len);
+		MLX5_SET(sw_tls_rx_cntx, ctx, key.key_len, en->cipher_key_len);
+	} else {
+		return (EINVAL);
+	}
+
+	if (__predict_false(inp == NULL ||
+	    ktls_get_rx_sequence(inp, &tcp_sn_he, &tls_sn_he) != 0))
+		return (EINVAL);
+
+	MLX5_SET64(sw_tls_rx_cntx, ctx, param.initial_record_number, tls_sn_he);
+	MLX5_SET(sw_tls_rx_cntx, ctx, param.resync_tcp_sn, tcp_sn_he);
+
+	return (0);
+}
+
+/* Verify zero default */
+CTASSERT(MLX5E_TLS_RX_ST_INIT == 0);
+
+/*
+ * This functino is responsible for allocating a TLS RX tag. It is a
+ * callback function invoked by the network stack.
+ *
+ * Returns zero on success else an error happened.
+ */
+int
+mlx5e_tls_rx_snd_tag_alloc(struct ifnet *ifp,
+    union if_snd_tag_alloc_params *params,
+    struct m_snd_tag **ppmt)
+{
+	struct mlx5e_iq *iq;
+	struct mlx5e_priv *priv;
+	struct mlx5e_tls_rx_tag *ptag;
+	struct mlx5_flow_rule *flow_rule;
+	const struct tls_session_params *en;
+	uint32_t value;
+	int error;
+
+	priv = ifp->if_softc;
+
+	if (unlikely(priv->gone != 0 || priv->tls_rx.init == 0 ||
+	    params->hdr.flowtype == M_HASHTYPE_NONE))
+		return (EOPNOTSUPP);
+
+	/* allocate new tag from zone, if any */
+	ptag = uma_zalloc(priv->tls_rx.zone, M_NOWAIT);
+	if (ptag == NULL)
+		return (ENOMEM);
+
+	/* sanity check default values */
+	MPASS(ptag->state == MLX5E_TLS_RX_ST_INIT);
+	MPASS(ptag->dek_index == 0);
+	MPASS(ptag->dek_index_ok == 0);
+
+	/* setup TLS RX tag */
+	ptag->tls_rx = &priv->tls_rx;
+	ptag->flowtype = params->hdr.flowtype;
+	ptag->flowid = params->hdr.flowid;
+
+	value = atomic_fetchadd_32(&priv->tls_rx.num_resources, 1U);
+
+	/* check resource limits */
+	if (value >= priv->tls_rx.max_resources) {
+		error = ENOMEM;
+		goto failure;
+	}
*** 460 LINES SKIPPED ***