git: 7148611e4f56 - stable/13 - vmm: avoid spurious rendezvous

From: Corvin Köhne <corvink_at_FreeBSD.org>
Date: Wed, 08 Feb 2023 10:40:22 UTC
The branch stable/13 has been updated by corvink:

URL: https://cgit.FreeBSD.org/src/commit/?id=7148611e4f560f375d4b92fdeb9451a792dc73fc

commit 7148611e4f560f375d4b92fdeb9451a792dc73fc
Author:     Corvin Köhne <corvink@FreeBSD.org>
AuthorDate: 2022-11-15 10:53:49 +0000
Commit:     Corvin Köhne <corvink@FreeBSD.org>
CommitDate: 2023-02-08 09:28:47 +0000

    vmm: avoid spurious rendezvous
    
    A vcpu only checks if a rendezvous is in progress or not to decide if it
    should handle a rendezvous. This could lead to spurios rendezvous where
    a vcpu tries a handle a rendezvous it isn't part of. This situation is
    properly handled by vm_handle_rendezvous but it could potentially
    degrade the performance. Avoid that by an early check if the vcpu is
    part of the rendezvous or not.
    
    At the moment, rendezvous are only used to spin up application
    processors and to send ioapic interrupts. Spinning up application
    processors is done in the guest boot phase by sending INIT SIPI
    sequences to single vcpus. This is known to cause spurious rendezvous
    and only occurs in the boot phase. Sending ioapic interrupts is rare
    because modern guest will use msi and the rendezvous is always send to
    all vcpus.
    
    Reviewed by:            jhb
    MFC after:              1 week
    Sponsored by:           Beckhoff Automation GmbH & Co. KG
    Differential Revision:  https://reviews.freebsd.org/D37390
    
    (cherry picked from commit 892feec2211d0dbd58252a34d78dbcb2d5dd7593)
---
 sys/amd64/include/vmm.h   | 14 ++++++++++----
 sys/amd64/vmm/amd/svm.c   |  2 +-
 sys/amd64/vmm/intel/vmx.c |  2 +-
 sys/amd64/vmm/vmm.c       |  3 ++-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 15f946d4f120..cb883bb0e143 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -159,7 +159,7 @@ struct pmap;
 enum snapshot_req;
 
 struct vm_eventinfo {
-	void	*rptr;		/* rendezvous cookie */
+	cpuset_t *rptr;		/* rendezvous cookie */
 	int	*sptr;		/* suspend cookie */
 	int	*iptr;		/* reqidle cookie */
 };
@@ -331,10 +331,16 @@ void vm_await_start(struct vm *vm, const cpuset_t *waiting);
 #endif	/* _SYS__CPUSET_H_ */
 
 static __inline int
-vcpu_rendezvous_pending(struct vm_eventinfo *info)
+vcpu_rendezvous_pending(struct vcpu *vcpu, struct vm_eventinfo *info)
 {
-
-	return (*((uintptr_t *)(info->rptr)) != 0);
+	/*
+	 * This check isn't done with atomic operations or under a lock because
+	 * there's no need to. If the vcpuid bit is set, the vcpu is part of a
+	 * rendezvous and the bit won't be cleared until the vcpu enters the
+	 * rendezvous. On rendezvous exit, the cpuset is cleared and the vcpu
+	 * will see an empty cpuset. So, the races are harmless.
+	 */
+	return (CPU_ISSET(vcpu_vcpuid(vcpu), info->rptr));
 }
 
 static __inline int
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 2448501401e3..ee1154ef85b6 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -2053,7 +2053,7 @@ svm_run(void *vcpui, register_t rip, pmap_t pmap, struct vm_eventinfo *evinfo)
 			break;
 		}
 
-		if (vcpu_rendezvous_pending(evinfo)) {
+		if (vcpu_rendezvous_pending(vcpu->vcpu, evinfo)) {
 			enable_gintr();
 			vm_exit_rendezvous(vcpu->vcpu, state->rip);
 			break;
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 4b65e254cc91..fa94c707001c 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -3071,7 +3071,7 @@ vmx_run(void *vcpui, register_t rip, pmap_t pmap, struct vm_eventinfo *evinfo)
 			break;
 		}
 
-		if (vcpu_rendezvous_pending(evinfo)) {
+		if (vcpu_rendezvous_pending(vcpu->vcpu, evinfo)) {
 			enable_intr();
 			vm_exit_rendezvous(vcpu->vcpu, rip);
 			break;
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index f0ee159dcbb9..123d35a983c0 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1438,6 +1438,7 @@ vm_handle_rendezvous(struct vcpu *vcpu)
 		if (CPU_CMP(&vm->rendezvous_req_cpus,
 		    &vm->rendezvous_done_cpus) == 0) {
 			VMM_CTR0(vcpu, "Rendezvous completed");
+			CPU_ZERO(&vm->rendezvous_req_cpus);
 			vm->rendezvous_func = NULL;
 			wakeup(&vm->rendezvous_func);
 			break;
@@ -1858,7 +1859,7 @@ vm_run(struct vcpu *vcpu, struct vm_exit *vme_user)
 
 	pmap = vmspace_pmap(vm->vmspace);
 	vme = &vcpu->exitinfo;
-	evinfo.rptr = &vm->rendezvous_func;
+	evinfo.rptr = &vm->rendezvous_req_cpus;
 	evinfo.sptr = &vm->suspend;
 	evinfo.iptr = &vcpu->reqidle;
 restart: