git: 7b3bc182d0e6 - stable/13 - mlx5en: Improve RX- and TX- TLS refcounting.

From: Hans Petter Selasky <hselasky_at_FreeBSD.org>
Date: Thu, 24 Feb 2022 10:04:24 UTC
The branch stable/13 has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=7b3bc182d0e636f3a674657c0563678582bc47bb

commit 7b3bc182d0e636f3a674657c0563678582bc47bb
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2022-02-24 09:59:07 +0000
Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2022-02-24 09:59:07 +0000

    mlx5en: Improve RX- and TX- TLS refcounting.
    
    Use the send tag refcounting mechanism to refcount the RX- and TX- TLS
    send tags. Then it is no longer needed to wait for refcounts to reach
    zero when destroying RX- and TX- TLS send tags as a result of pending
    data or WQE commands.
    
    This also ensures that when TX-TLS and rate limiting is used at the same
    time, the underlying SQ is not prematurely destroyed.
    
    Sponsored by:   NVIDIA Networking
    
    (cherry picked from commit ebdb70064900a2ba2e3f8341328edc34e619170d)
---
 sys/dev/mlx5/mlx5_en/en.h             |  5 +++--
 sys/dev/mlx5/mlx5_en/en_hw_tls.h      |  2 +-
 sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c | 23 ++++++++++-------------
 sys/dev/mlx5/mlx5_en/mlx5_en_main.c   |  9 +++++----
 sys/dev/mlx5/mlx5_en/mlx5_en_tx.c     | 26 +++++++++++++++-----------
 5 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index 28928aa18fa1..424d7f66b056 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -829,7 +830,7 @@ struct mlx5e_iq {
 struct mlx5e_sq_mbuf {
 	bus_dmamap_t dma_map;
 	struct mbuf *mbuf;
-	volatile s32 *p_refcount;	/* in use refcount, if any */
+	struct m_snd_tag *mst;	/* if set, unref this send tag on completion */
 	u32	num_bytes;
 	u32	num_wqebbs;
 };
@@ -1043,7 +1044,7 @@ struct mlx5e_flow_tables {
 };
 
 struct mlx5e_xmit_args {
-	volatile s32 *pref;
+	struct m_snd_tag *mst;
 	u32 tisn;
 	u16 ihs;
 };
diff --git a/sys/dev/mlx5/mlx5_en/en_hw_tls.h b/sys/dev/mlx5/mlx5_en/en_hw_tls.h
index 563c5304dd30..7ba00fc1f363 100644
--- a/sys/dev/mlx5/mlx5_en/en_hw_tls.h
+++ b/sys/dev/mlx5/mlx5_en/en_hw_tls.h
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -44,7 +45,6 @@ enum {
 struct mlx5e_tls;
 struct mlx5e_tls_tag {
 	struct m_snd_tag tag;
-	volatile s32 refs;	/* number of pending mbufs */
 	uint32_t tisn;		/* HW TIS context number */
 	uint32_t dek_index;	/* HW TLS context number */
 	struct mlx5e_tls *tls;
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
index 57d77cc897db..e28fddcfd116 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
@@ -1,5 +1,6 @@
 /*-
- * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2019-2021 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -225,10 +226,6 @@ mlx5e_tls_work(struct work_struct *work)
 		break;
 
 	case MLX5E_TLS_ST_FREED:
-		/* wait for all refs to go away */
-		while (ptag->refs != 0)
-			msleep(1);
-
 		/* try to destroy DEK context by ID */
 		if (ptag->dek_index_ok)
 			err = mlx5_encryption_key_destroy(priv->mdev, ptag->dek_index);
@@ -519,8 +516,8 @@ mlx5e_tls_send_static_parameters(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag
 	sq->mbuf[pi].mbuf = NULL;
 	sq->mbuf[pi].num_bytes = 0;
 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	sq->mbuf[pi].p_refcount = &ptag->refs;
-	atomic_add_int(&ptag->refs, 1);
+	sq->mbuf[pi].mst = m_snd_tag_ref(&ptag->tag);
+
 	sq->pc += sq->mbuf[pi].num_wqebbs;
 }
 
@@ -557,8 +554,8 @@ mlx5e_tls_send_progress_parameters(struct mlx5e_sq *sq, struct mlx5e_tls_tag *pt
 	sq->mbuf[pi].mbuf = NULL;
 	sq->mbuf[pi].num_bytes = 0;
 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	sq->mbuf[pi].p_refcount = &ptag->refs;
-	atomic_add_int(&ptag->refs, 1);
+	sq->mbuf[pi].mst = m_snd_tag_ref(&ptag->tag);
+
 	sq->pc += sq->mbuf[pi].num_wqebbs;
 }
 
@@ -587,8 +584,8 @@ mlx5e_tls_send_nop(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag)
 	sq->mbuf[pi].mbuf = NULL;
 	sq->mbuf[pi].num_bytes = 0;
 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	sq->mbuf[pi].p_refcount = &ptag->refs;
-	atomic_add_int(&ptag->refs, 1);
+	sq->mbuf[pi].mst = m_snd_tag_ref(&ptag->tag);
+
 	sq->pc += sq->mbuf[pi].num_wqebbs;
 }
 
@@ -768,7 +765,7 @@ mlx5e_sq_tls_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf
 
 		/* setup transmit arguments */
 		parg->tisn = ptls_tag->tisn;
-		parg->pref = &ptls_tag->refs;
+		parg->mst = &ptls_tag->tag;
 
 		/* try to send DUMP data */
 		if (mlx5e_sq_dump_xmit(sq, parg, &r_mb) != 0) {
@@ -787,7 +784,7 @@ mlx5e_sq_tls_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf
 
 	parg->tisn = ptls_tag->tisn;
 	parg->ihs = header_size;
-	parg->pref = &ptls_tag->refs;
+	parg->mst = &ptls_tag->tag;
 	return (MLX5E_TLS_CONTINUE);
 }
 
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index 0cff33543cb1..1bbd7f75b1db 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -1568,14 +1569,14 @@ mlx5e_free_sq_db(struct mlx5e_sq *sq)
 	int x;
 
 	for (x = 0; x != wq_sz; x++) {
-		if (unlikely(sq->mbuf[x].p_refcount != NULL)) {
-			atomic_add_int(sq->mbuf[x].p_refcount, -1);
-			sq->mbuf[x].p_refcount = NULL;
-		}
 		if (sq->mbuf[x].mbuf != NULL) {
 			bus_dmamap_unload(sq->dma_tag, sq->mbuf[x].dma_map);
 			m_freem(sq->mbuf[x].mbuf);
 		}
+		if (sq->mbuf[x].mst != NULL) {
+			m_snd_tag_rele(sq->mbuf[x].mst);
+			sq->mbuf[x].mst = NULL;
+		}
 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
 	}
 	free(sq->mbuf, M_MLX5EN);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
index 4cc25e1d5b51..088b55d81dfe 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -665,8 +666,7 @@ mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbu
 
 	/* store pointer to mbuf */
 	sq->mbuf[pi].mbuf = mb;
-	sq->mbuf[pi].p_refcount = parg->pref;
-	atomic_add_int(parg->pref, 1);
+	sq->mbuf[pi].mst = m_snd_tag_ref(parg->mst);
 
 	/* count all traffic going out */
 	sq->stats.packets++;
@@ -996,9 +996,11 @@ top:
 	/* Store pointer to mbuf */
 	sq->mbuf[pi].mbuf = mb;
 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	sq->mbuf[pi].p_refcount = args.pref;
-	if (unlikely(args.pref != NULL))
-		atomic_add_int(args.pref, 1);
+	if (unlikely(args.mst != NULL))
+		sq->mbuf[pi].mst = m_snd_tag_ref(args.mst);
+	else
+		MPASS(sq->mbuf[pi].mst == NULL);
+
 	sq->pc += sq->mbuf[pi].num_wqebbs;
 
 	/* Count all traffic going out */
@@ -1028,6 +1030,7 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
 
 	while (budget > 0) {
 		struct mlx5_cqe64 *cqe;
+		struct m_snd_tag *mst;
 		struct mbuf *mb;
 		bool match;
 		u16 sqcc_this;
@@ -1067,13 +1070,10 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
 			match = (delta < sq->mbuf[ci].num_wqebbs);
 			mb = sq->mbuf[ci].mbuf;
 			sq->mbuf[ci].mbuf = NULL;
+			mst = sq->mbuf[ci].mst;
+			sq->mbuf[ci].mst = NULL;
 
-			if (unlikely(sq->mbuf[ci].p_refcount != NULL)) {
-				atomic_add_int(sq->mbuf[ci].p_refcount, -1);
-				sq->mbuf[ci].p_refcount = NULL;
-			}
-
-			if (mb == NULL) {
+			if (unlikely(mb == NULL)) {
 				if (unlikely(sq->mbuf[ci].num_bytes == 0))
 					sq->stats.nop++;
 			} else {
@@ -1084,6 +1084,10 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
 				/* Free transmitted mbuf */
 				m_freem(mb);
 			}
+
+			if (unlikely(mst != NULL))
+				m_snd_tag_rele(mst);
+
 			sqcc += sq->mbuf[ci].num_wqebbs;
 		}
 	}