git: 17050a2b5b07 - main - Hyper-V: vmbus: Prevent load/store reordering when access ring buffer index

From: Wei Hu <whu_at_FreeBSD.org>
Date: Wed, 21 Jun 2023 10:25:25 UTC
The branch main has been updated by whu:

URL: https://cgit.FreeBSD.org/src/commit/?id=17050a2b5b07160635aaa80b9aa4df4de500b090

commit 17050a2b5b07160635aaa80b9aa4df4de500b090
Author:     Wei Hu <whu@FreeBSD.org>
AuthorDate: 2023-06-21 09:31:46 +0000
Commit:     Wei Hu <whu@FreeBSD.org>
CommitDate: 2023-06-21 10:10:49 +0000

    Hyper-V: vmbus: Prevent load/store reordering when access ring buffer index
    
    When running VM on ARM64 Hyper-V, we have seen netvsc/hn driver hit
    assert on reading duplicated network completion packets over vmbus
    channel or one of the tx channels stalls completely. This seems to
    caused by processor reordering the instructions when vmbus driver
    reading or updating its channel ring buffer indexes.
    
    Fix this by using load acquire and store release instructions to
    enforce the order of these memory accesses.
    
    PR:             271764
    Reported by:    Souradeep Chakrabarti <schakrabarti@microsoft.com>
    Reviewed by:    Souradeep Chakrabarti <schakrabarti@microsoft.com>
    Tested by:      whu
    Sponsored by:   Microsoft
---
 sys/dev/hyperv/vmbus/vmbus_br.c    | 31 +++++++++++++++++--------------
 sys/dev/hyperv/vmbus/vmbus_brvar.h |  6 ++++--
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/sys/dev/hyperv/vmbus/vmbus_br.c b/sys/dev/hyperv/vmbus/vmbus_br.c
index 7311f87fd596..c3771f2607b9 100644
--- a/sys/dev/hyperv/vmbus/vmbus_br.c
+++ b/sys/dev/hyperv/vmbus/vmbus_br.c
@@ -142,8 +142,8 @@ vmbus_rxbr_avail(const struct vmbus_rxbr *rbr)
 	uint32_t rindex, windex;
 
 	/* Get snapshot */
-	rindex = rbr->rxbr_rindex;
-	windex = rbr->rxbr_windex;
+	rindex = atomic_load_acq_32(&rbr->rxbr_rindex);
+	windex = atomic_load_acq_32(&rbr->rxbr_windex);
 
 	return (rbr->rxbr_dsize -
 	    VMBUS_BR_WAVAIL(rindex, windex, rbr->rxbr_dsize));
@@ -289,7 +289,7 @@ vmbus_txbr_need_signal(const struct vmbus_txbr *tbr, uint32_t old_windex)
 	 * This is the only case we need to signal when the
 	 * ring transitions from being empty to non-empty.
 	 */
-	if (old_windex == tbr->txbr_rindex)
+	if (old_windex == atomic_load_acq_32(&tbr->txbr_rindex))
 		return (TRUE);
 
 	return (FALSE);
@@ -301,8 +301,8 @@ vmbus_txbr_avail(const struct vmbus_txbr *tbr)
 	uint32_t rindex, windex;
 
 	/* Get snapshot */
-	rindex = tbr->txbr_rindex;
-	windex = tbr->txbr_windex;
+	rindex = atomic_load_acq_32(&tbr->txbr_rindex);
+	windex = atomic_load_acq_32(&tbr->txbr_windex);
 
 	return VMBUS_BR_WAVAIL(rindex, windex, tbr->txbr_dsize);
 }
@@ -427,7 +427,7 @@ vmbus_txbr_write_call(struct vmbus_txbr *tbr,
 	 * is copied.
 	 */
 	__compiler_membar();
-	tbr->txbr_windex = windex;
+	atomic_store_rel_32(&tbr->txbr_windex, windex);
 
 	mtx_unlock_spin(&tbr->txbr_lock);
 
@@ -471,7 +471,7 @@ vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen,
 	}
 
 	/* Save br_windex for later use */
-	old_windex = tbr->txbr_windex;
+	old_windex = atomic_load_acq_32(&tbr->txbr_windex);
 
 	/*
 	 * Copy the scattered channel packet to the TX bufring.
@@ -494,7 +494,7 @@ vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen,
 	 * is copied.
 	 */
 	__compiler_membar();
-	tbr->txbr_windex = windex;
+	atomic_store_rel_32(&tbr->txbr_windex, windex);
 
 	mtx_unlock_spin(&tbr->txbr_lock);
 
@@ -557,7 +557,8 @@ vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen)
 		mtx_unlock_spin(&rbr->rxbr_lock);
 		return (EAGAIN);
 	}
-	vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
+	vmbus_rxbr_copyfrom(rbr,
+	    atomic_load_acq_32(&rbr->rxbr_rindex), data, dlen);
 
 	mtx_unlock_spin(&rbr->rxbr_lock);
 
@@ -622,10 +623,11 @@ vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data, int dlen,
 		rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
 		    idx_adv + sizeof(uint64_t), br_dsize);
 		__compiler_membar();
-		rbr->rxbr_rindex = rindex;
+		atomic_store_rel_32(&rbr->rxbr_rindex, rindex);
 	}
 
-	vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
+	vmbus_rxbr_copyfrom(rbr,
+	    atomic_load_acq_32(&rbr->rxbr_rindex), data, dlen);
 
 	mtx_unlock_spin(&rbr->rxbr_lock);
 
@@ -667,7 +669,7 @@ vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv,
 	rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
 	    idx_adv + sizeof(uint64_t), br_dsize);
 	__compiler_membar();
-	rbr->rxbr_rindex = rindex;
+	atomic_store_rel_32(&rbr->rxbr_rindex, rindex);
 
 	mtx_unlock_spin(&rbr->rxbr_lock);
 
@@ -700,7 +702,8 @@ vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip)
 	/*
 	 * Copy channel packet from RX bufring.
 	 */
-	rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize);
+	rindex = VMBUS_BR_IDXINC(atomic_load_acq_32(&rbr->rxbr_rindex),
+	    skip, br_dsize);
 	rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
 
 	/*
@@ -712,7 +715,7 @@ vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip)
 	 * Update the read index _after_ the channel packet is fetched.
 	 */
 	__compiler_membar();
-	rbr->rxbr_rindex = rindex;
+	atomic_store_rel_32(&rbr->rxbr_rindex, rindex);
 
 	mtx_unlock_spin(&rbr->rxbr_lock);
 
diff --git a/sys/dev/hyperv/vmbus/vmbus_brvar.h b/sys/dev/hyperv/vmbus/vmbus_brvar.h
index 95bf4338ff1c..0edcc61ae5e1 100644
--- a/sys/dev/hyperv/vmbus/vmbus_brvar.h
+++ b/sys/dev/hyperv/vmbus/vmbus_brvar.h
@@ -99,14 +99,16 @@ static __inline bool
 vmbus_txbr_empty(const struct vmbus_txbr *tbr)
 {
 
-	return (tbr->txbr_windex == tbr->txbr_rindex ? true : false);
+	return (atomic_load_acq_32(&tbr->txbr_windex) ==
+	    atomic_load_acq_32(&tbr->txbr_rindex) ? true : false);
 }
 
 static __inline bool
 vmbus_rxbr_empty(const struct vmbus_rxbr *rbr)
 {
 
-	return (rbr->rxbr_windex == rbr->rxbr_rindex ? true : false);
+	return (atomic_load_acq_32(&rbr->rxbr_windex) ==
+	    atomic_load_acq_32(&rbr->rxbr_rindex) ? true : false);
 }
 
 static __inline int