git: 6698c1a28d09 - stable/13 - linux(4): Allow musl brand to use FUTEX_REQUEUE op.

From: Dmitry Chagin <dchagin_at_FreeBSD.org>
Date: Fri, 17 Jun 2022 19:37:09 UTC
The branch stable/13 has been updated by dchagin:

URL: https://cgit.FreeBSD.org/src/commit/?id=6698c1a28d09b58fbb0921ec727b9917fd5d2b7a

commit 6698c1a28d09b58fbb0921ec727b9917fd5d2b7a
Author:     Dmitry Chagin <dchagin@FreeBSD.org>
AuthorDate: 2021-07-20 11:39:20 +0000
Commit:     Dmitry Chagin <dchagin@FreeBSD.org>
CommitDate: 2022-06-17 19:33:12 +0000

    linux(4): Allow musl brand to use FUTEX_REQUEUE op.
    
    Initial patch from submitter was adapted by me to prevent unconditional
    FUTEX_REQUEUE use.
    
    PR:                     255947
    Submitted by:           Philippe Michaud-Boudreault
    Differential Revision:  https://reviews.freebsd.org/D30332
    
    (cherry picked from commit cf8d74e3fe63da7359d6ca7e0b4b57c5684c2845)
---
 sys/amd64/linux/linux_sysvec.c     |  3 ++-
 sys/amd64/linux32/linux32_sysvec.c |  3 ++-
 sys/compat/linux/linux.h           |  5 ++++
 sys/compat/linux/linux_futex.c     | 49 +++++++++++++++++++++++++-------------
 sys/i386/linux/linux_sysvec.c      |  3 ++-
 5 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index e098779e518e..08b9a5f1650f 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -985,7 +985,8 @@ static Elf64_Brandinfo linux_muslbrand = {
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux64_brandnote,
-	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE |
+			    LINUX_BI_FUTEX_REQUEUE
 };
 
 Elf64_Brandinfo *linux_brandlist[] = {
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 9e77c5e8ac7d..77d6c612c359 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -1145,7 +1145,8 @@ static Elf32_Brandinfo linux_muslbrand = {
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux32_brandnote,
-	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE |
+			    LINUX_BI_FUTEX_REQUEUE
 };
 
 Elf32_Brandinfo *linux_brandlist[] = {
diff --git a/sys/compat/linux/linux.h b/sys/compat/linux/linux.h
index 5656b67251ff..2ec12fd64d58 100644
--- a/sys/compat/linux/linux.h
+++ b/sys/compat/linux/linux.h
@@ -30,6 +30,11 @@
 
 #include <sys/queue.h>
 
+/*
+ * Private Brandinfo flags
+ */
+#define	LINUX_BI_FUTEX_REQUEUE	0x01000000
+
 /*
  * poll()
  */
diff --git a/sys/compat/linux/linux_futex.c b/sys/compat/linux/linux_futex.c
index 1ba11ed3fc57..2ee18c26cf24 100644
--- a/sys/compat/linux/linux_futex.c
+++ b/sys/compat/linux/linux_futex.c
@@ -46,6 +46,7 @@ __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $")
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
+#include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
@@ -234,6 +235,7 @@ struct linux_futex_args {
 	struct timespec	*ts;
 	uint32_t	*uaddr2;
 	uint32_t	val3;
+	bool		val3_compare;
 	struct timespec	kts;
 };
 
@@ -648,6 +650,7 @@ static int
 linux_futex(struct thread *td, struct linux_futex_args *args)
 {
 	struct linux_pemuldata *pem;
+	struct proc *p;
 
 	if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
 		args->flags = 0;
@@ -695,6 +698,33 @@ linux_futex(struct thread *td, struct linux_futex_args *args)
 
 		return (linux_futex_wake(td, args));
 
+	case LINUX_FUTEX_REQUEUE:
+		/*
+		 * Glibc does not use this operation since version 2.3.3,
+		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
+		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
+		 * FUTEX_REQUEUE returned EINVAL.
+		 */
+		pem = pem_find(td->td_proc);
+		if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
+			linux_msg(td, "unsupported FUTEX_REQUEUE");
+			pem->flags |= LINUX_XDEPR_REQUEUEOP;
+			LIN_SDT_PROBE0(futex, linux_futex,
+			    deprecated_requeue);
+		}
+
+		/*
+		 * The above is true, however musl libc does make use of the
+		 * futex requeue operation, allow operation for brands which
+		 * set LINUX_BI_FUTEX_REQUEUE bit of Brandinfo flags.
+		 */
+		p = td->td_proc;
+		Elf_Brandinfo *bi = p->p_elf_brandinfo;
+		if (bi == NULL || ((bi->flags & LINUX_BI_FUTEX_REQUEUE)) == 0)
+			return (EINVAL);
+		args->val3_compare = false;
+		/* FALLTHROUGH */
+
 	case LINUX_FUTEX_CMP_REQUEUE:
 		LIN_SDT_PROBE5(futex, linux_futex, debug_cmp_requeue,
 		    args->uaddr, args->val, args->val3, args->uaddr2,
@@ -749,22 +779,6 @@ linux_futex(struct thread *td, struct linux_futex_args *args)
 		}
 		return (ENOSYS);
 
-	case LINUX_FUTEX_REQUEUE:
-		/*
-		 * Glibc does not use this operation since version 2.3.3,
-		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
-		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
-		 * FUTEX_REQUEUE returned EINVAL.
-		 */
-		pem = pem_find(td->td_proc);
-		if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
-			linux_msg(td, "unsupported FUTEX_REQUEUE");
-			pem->flags |= LINUX_XDEPR_REQUEUEOP;
-			LIN_SDT_PROBE0(futex, linux_futex,
-			    deprecated_requeue);
-		}
-		return (EINVAL);
-
 	case LINUX_FUTEX_WAIT_REQUEUE_PI:
 		/* not yet implemented */
 		pem = pem_find(td->td_proc);
@@ -921,7 +935,7 @@ retry:
 		    error);
 		return (error);
 	}
-	if (val != args->val3) {
+	if (args->val3_compare == true && val != args->val3) {
 		LIN_SDT_PROBE2(futex, linux_futex,
 		    debug_cmp_requeue_value_neq, args->val, val);
 		LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x",
@@ -1016,6 +1030,7 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
 		.ts = NULL,
 		.uaddr2 = args->uaddr2,
 		.val3 = args->val3,
+		.val3_compare = true,
 	};
 	struct l_timespec lts;
 	int error;
diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c
index 15fec674f8c5..3a12f4eb7689 100644
--- a/sys/i386/linux/linux_sysvec.c
+++ b/sys/i386/linux/linux_sysvec.c
@@ -1096,7 +1096,8 @@ static Elf32_Brandinfo linux_muslbrand = {
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux_brandnote,
-	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE |
+			    LINUX_BI_FUTEX_REQUEUE
 };
 
 Elf32_Brandinfo *linux_brandlist[] = {