git: 68ba38dad388 - main - amd64: add pcb_tlsbase

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Wed, 28 May 2025 11:11:40 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=68ba38dad388b566877ba79ff02ed12a53b0ff3f

commit 68ba38dad388b566877ba79ff02ed12a53b0ff3f
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2025-05-19 23:56:37 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2025-05-28 11:11:23 +0000

    amd64: add pcb_tlsbase
    
    This is a value for TLS base that would be explicitly set by user.
    It is forced into the fsbase register on entry to signal handler, making
    TLS in the handler usable even if userspace uses segmentation.
    
    Reported and tested:    Alex S <iwtcex@gmail.com>
    Tested by:      pho
    Reviewed by:    olce
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D50482
---
 sys/amd64/amd64/exec_machdep.c     |  6 ++++--
 sys/amd64/amd64/sys_machdep.c      | 15 +++++++++++++++
 sys/amd64/amd64/vm_machdep.c       |  8 ++++----
 sys/amd64/ia32/ia32_signal.c       |  1 +
 sys/amd64/include/pcb.h            |  4 +++-
 sys/amd64/linux/linux_sysvec.c     |  2 +-
 sys/amd64/linux32/linux32_sysvec.c |  7 ++++---
 sys/x86/include/sysarch.h          |  2 ++
 8 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c
index 8361f9fcfcab..46e4f0c9e07b 100644
--- a/sys/amd64/amd64/exec_machdep.c
+++ b/sys/amd64/amd64/exec_machdep.c
@@ -209,6 +209,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
+	if ((pcb->pcb_flags & PCB_TLSBASE) != 0)
+		pcb->pcb_fsbase = pcb->pcb_tlsbase;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
@@ -379,9 +381,9 @@ exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 		user_ldt_free(td);
 
 	update_pcb_bases(pcb);
-	pcb->pcb_fsbase = 0;
+	pcb->pcb_fsbase = pcb->pcb_tlsbase = 0;
 	pcb->pcb_gsbase = 0;
-	clear_pcb_flags(pcb, PCB_32BIT);
+	clear_pcb_flags(pcb, PCB_32BIT | PCB_TLSBASE);
 	pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
 
 	saved_rflags = regs->tf_rflags & PSL_T;
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
index 70a369ec64a3..6deebba5babc 100644
--- a/sys/amd64/amd64/sys_machdep.c
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -206,6 +206,8 @@ sysarch(struct thread *td, struct sysarch_args *uap)
 	case AMD64_GET_XFPUSTATE:
 	case AMD64_SET_PKRU:
 	case AMD64_CLEAR_PKRU:
+	case AMD64_GET_TLSBASE:
+	case AMD64_SET_TLSBASE:
 		break;
 
 	case I386_SET_IOPERM:
@@ -311,14 +313,27 @@ sysarch(struct thread *td, struct sysarch_args *uap)
 		error = copyout(&pcb->pcb_fsbase, uap->parms,
 		    sizeof(pcb->pcb_fsbase));
 		break;
+	case AMD64_GET_TLSBASE:
+		if ((pcb->pcb_flags & PCB_TLSBASE) == 0) {
+			error = ESRCH;
+		} else {
+			error = copyout(&pcb->pcb_tlsbase, uap->parms,
+			    sizeof(pcb->pcb_tlsbase));
+		}
+		break;
 
 	case AMD64_SET_FSBASE:
+	case AMD64_SET_TLSBASE:
 		error = copyin(uap->parms, &a64base, sizeof(a64base));
 		if (error == 0) {
 			if (a64base < curproc->p_sysent->sv_maxuser) {
 				set_pcb_flags(pcb, PCB_FULL_IRET);
 				pcb->pcb_fsbase = a64base;
 				td->td_frame->tf_fs = _ufssel;
+				if (uap->op == AMD64_SET_TLSBASE) {
+					pcb->pcb_tlsbase = a64base;
+					set_pcb_flags(pcb, PCB_TLSBASE);
+				}
 			} else
 				error = EINVAL;
 		}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 2b280e960b2e..4001f40554af 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -156,7 +156,7 @@ copy_thread(struct thread *td1, struct thread *td2)
 
 	/* Kernel threads start with clean FPU and segment bases. */
 	if ((td2->td_pflags & TDP_KTHREAD) != 0) {
-		pcb2->pcb_fsbase = 0;
+		pcb2->pcb_fsbase = pcb2->pcb_tlsbase = 0;
 		pcb2->pcb_gsbase = 0;
 		clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE |
 		    PCB_KERNFPU | PCB_KERNFPU_THR);
@@ -182,7 +182,7 @@ copy_thread(struct thread *td1, struct thread *td2)
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
-	 * pcb2->pcb_[fg]sbase:	cloned above
+	 * pcb2->pcb_[f,g,tls]sbase:	cloned above
 	 */
 
 	pcb2->pcb_tssp = NULL;
@@ -663,14 +663,14 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
 		return (EINVAL);
 
 	pcb = td->td_pcb;
-	set_pcb_flags(pcb, PCB_FULL_IRET);
+	set_pcb_flags(pcb, PCB_FULL_IRET | PCB_TLSBASE);
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		pcb->pcb_gsbase = (register_t)tls_base;
 		return (0);
 	}
 #endif
-	pcb->pcb_fsbase = (register_t)tls_base;
+	pcb->pcb_fsbase = pcb->pcb_tlsbase = (register_t)tls_base;
 	return (0);
 }
 
diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c
index dea12c5346a3..54e170450dba 100644
--- a/sys/amd64/ia32/ia32_signal.c
+++ b/sys/amd64/ia32/ia32_signal.c
@@ -958,4 +958,5 @@ ia32_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 
 	/* Return via doreti so that we can change to a different %cs */
 	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
+	clear_pcb_flags(pcb, PCB_TLSBASE);
 }
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 1caa33ac0019..27e1dce08ee1 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -84,6 +84,7 @@ struct pcb {
 #define	PCB_KERNFPU_THR	0x0020	/* fpu_kern_thread() */
 #define	PCB_32BIT	0x0040	/* process has 32 bit context (segs etc) */
 #define	PCB_FPUNOSAVE	0x0080	/* no save area for current FPU ctx */
+#define	PCB_TLSBASE	0x0100	/* tlsbase was set */
 
 	uint16_t	pcb_initial_fpucw;
 
@@ -104,7 +105,8 @@ struct pcb {
 
 	struct savefpu	*pcb_save;
 
-	uint64_t	pcb_pad[5];
+	register_t	pcb_tlsbase;	/* not same as pcb_fsbase */
+	uint64_t	pcb_pad[4];
 };
 
 /* Per-CPU state saved during suspend and resume. */
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index 9c3d7e6405c1..3760a5455a73 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -247,7 +247,7 @@ linux_exec_setregs(struct thread *td, struct image_params *imgp,
 
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
-	clear_pcb_flags(pcb, PCB_32BIT);
+	clear_pcb_flags(pcb, PCB_32BIT | PCB_TLSBASE);
 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 19b4af7661f0..a8cc03ea8c71 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -580,6 +580,10 @@ linux_exec_setregs(struct thread *td, struct image_params *imgp,
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 
+	/* Do full restore on return so that we can change to a different %cs */
+	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
+	clear_pcb_flags(pcb, PCB_TLSBASE);
+
 	critical_enter();
 	wrmsr(MSR_FSBASE, 0);
 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
@@ -605,9 +609,6 @@ linux_exec_setregs(struct thread *td, struct image_params *imgp,
 	x86_clear_dbregs(pcb);
 
 	fpstate_drop(td);
-
-	/* Do full restore on return so that we can change to a different %cs */
-	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
 }
 
 /*
diff --git a/sys/x86/include/sysarch.h b/sys/x86/include/sysarch.h
index 3226f3b9d93a..61f559f4bd23 100644
--- a/sys/x86/include/sysarch.h
+++ b/sys/x86/include/sysarch.h
@@ -61,6 +61,8 @@
 #define	AMD64_GET_XFPUSTATE	132
 #define	AMD64_SET_PKRU		133
 #define	AMD64_CLEAR_PKRU	134
+#define	AMD64_GET_TLSBASE	135
+#define	AMD64_SET_TLSBASE	136
 
 /* Flags for AMD64_SET_PKRU */
 #define	AMD64_PKRU_EXCL		0x0001