svn commit: r195486 - in head/sys/amd64: amd64 ia32 include linux32

Konstantin Belousov kib at FreeBSD.org
Thu Jul 9 09:34:12 UTC 2009


Author: kib
Date: Thu Jul  9 09:34:11 2009
New Revision: 195486
URL: http://svn.freebsd.org/changeset/base/195486

Log:
  Restore the segment registers and segment base MSRs for amd64 syscall
  return path only when neither thread was context switched while
  executing syscall code nor syscall explicitely modified LDT or MSRs.
  
  Save segment registers in trap handlers before interrupts are enabled,
  to not allow context switches to happen before registers are saved.
  Use separated byte in pcb for indication of fast/full return, since
  pcb_flags are not synchronized with context switches.
  
  The change puts back syscall microbenchmark numbers that were slowed
  down after commit of the support for LDT on amd64.
  
  Reviewed by:	jeff
  Tested (and tested, and tested ...) by:	pho
  Approved by:	re (kensmith)

Modified:
  head/sys/amd64/amd64/cpu_switch.S
  head/sys/amd64/amd64/exception.S
  head/sys/amd64/amd64/genassym.c
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/amd64/sys_machdep.c
  head/sys/amd64/amd64/vm_machdep.c
  head/sys/amd64/ia32/ia32_exception.S
  head/sys/amd64/ia32/ia32_reg.c
  head/sys/amd64/ia32/ia32_signal.c
  head/sys/amd64/include/pcb.h
  head/sys/amd64/linux32/linux32_sysvec.c

Modified: head/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- head/sys/amd64/amd64/cpu_switch.S	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/amd64/cpu_switch.S	Thu Jul  9 09:34:11 2009	(r195486)
@@ -97,6 +97,7 @@ END(cpu_throw)
 ENTRY(cpu_switch)
 	/* Switch to new thread.  First, save context. */
 	movq	TD_PCB(%rdi),%r8
+	movb	$1,PCB_FULL_IRET(%r8)
 
 	movq	(%rsp),%rax			/* Hardware registers */
 	movq	%r15,PCB_R15(%r8)

Modified: head/sys/amd64/amd64/exception.S
==============================================================================
--- head/sys/amd64/amd64/exception.S	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/amd64/exception.S	Thu Jul  9 09:34:11 2009	(r195486)
@@ -162,19 +162,20 @@ IDTVEC(align)
 	.globl	alltraps
 	.type	alltraps, at function
 alltraps:
+	movq	%rdi,TF_RDI(%rsp)
 	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
 	jz	alltraps_testi		/* already running with kernel GS.base */
 	swapgs
+	movq	PCPU(CURPCB),%rdi
+	movb	$0,PCB_FULL_IRET(%rdi)
 	movw	%fs,TF_FS(%rsp)
 	movw	%gs,TF_GS(%rsp)
 	movw	%es,TF_ES(%rsp)
 	movw	%ds,TF_DS(%rsp)
 alltraps_testi:
 	testl	$PSL_I,TF_RFLAGS(%rsp)
-	jz	alltraps_pushregs
+	jz	alltraps_pushregs_no_rdi
 	sti
-alltraps_pushregs:
-	movq	%rdi,TF_RDI(%rsp)
 alltraps_pushregs_no_rdi:
 	movq	%rsi,TF_RSI(%rsp)
 	movq	%rdx,TF_RDX(%rsp)
@@ -233,14 +234,17 @@ calltrap:
 	.globl	alltraps_noen
 	.type	alltraps_noen, at function
 alltraps_noen:
+	movq	%rdi,TF_RDI(%rsp)
 	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
 	jz	1f	/* already running with kernel GS.base */
 	swapgs
+	movq	PCPU(CURPCB),%rdi
+	movb	$0,PCB_FULL_IRET(%rdi)
 1:	movw	%fs,TF_FS(%rsp)
 	movw	%gs,TF_GS(%rsp)
 	movw	%es,TF_ES(%rsp)
 	movw	%ds,TF_DS(%rsp)
-	jmp	alltraps_pushregs
+	jmp	alltraps_pushregs_no_rdi
 
 IDTVEC(dblfault)
 	subq	$TF_ERR,%rsp
@@ -278,12 +282,13 @@ IDTVEC(dblfault)
 IDTVEC(page)
 	subq	$TF_ERR,%rsp
 	movl	$T_PAGEFLT,TF_TRAPNO(%rsp)
+	movq	%rdi,TF_RDI(%rsp)	/* free up a GP register */
 	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
 	jz	1f			/* already running with kernel GS.base */
 	swapgs
-1:
-	movq	%rdi,TF_RDI(%rsp)	/* free up a GP register */
-	movq	%cr2,%rdi		/* preserve %cr2 before ..  */
+	movq	PCPU(CURPCB),%rdi
+	movb	$0,PCB_FULL_IRET(%rdi)
+1:	movq	%cr2,%rdi		/* preserve %cr2 before ..  */
 	movq	%rdi,TF_ADDR(%rsp)	/* enabling interrupts. */
 	movw	%fs,TF_FS(%rsp)
 	movw	%gs,TF_GS(%rsp)
@@ -311,7 +316,9 @@ IDTVEC(prot)
 	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
 	jz	2f			/* already running with kernel GS.base */
 1:	swapgs
-2:	movw	%fs,TF_FS(%rsp)
+2:	movq	PCPU(CURPCB),%rdi
+	movb	$1,PCB_FULL_IRET(%rdi)	/* always full iret from GPF */
+	movw	%fs,TF_FS(%rsp)
 	movw	%gs,TF_GS(%rsp)
 	movw	%es,TF_ES(%rsp)
 	movw	%ds,TF_DS(%rsp)
@@ -341,6 +348,8 @@ IDTVEC(fast_syscall)
 	movw	%gs,TF_GS(%rsp)
 	movw	%es,TF_ES(%rsp)
 	movw	%ds,TF_DS(%rsp)
+	movq	PCPU(CURPCB),%r11
+	movb	$0,PCB_FULL_IRET(%r11)
 	sti
 	movq	$KUDSEL,TF_SS(%rsp)
 	movq	$KUCSEL,TF_CS(%rsp)
@@ -644,7 +653,8 @@ doreti_exit:
 	 */
 	testb	$SEL_RPL_MASK,TF_CS(%rsp)
 	jz	ld_regs
-
+	cmpb	$0,PCB_FULL_IRET(%r8)
+	je	ld_regs
 	testl	$TF_HASSEGS,TF_FLAGS(%rsp)
 	je	set_segs
 

Modified: head/sys/amd64/amd64/genassym.c
==============================================================================
--- head/sys/amd64/amd64/genassym.c	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/amd64/genassym.c	Thu Jul  9 09:34:11 2009	(r195486)
@@ -141,6 +141,7 @@ ASSYM(PCB_DR3, offsetof(struct pcb, pcb_
 ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
 ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
 ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
+ASSYM(PCB_FULL_IRET, offsetof(struct pcb, pcb_full_iret));
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_32BIT, PCB_32BIT);
 ASSYM(PCB_GS32BIT, PCB_GS32BIT);

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/amd64/machdep.c	Thu Jul  9 09:34:11 2009	(r195486)
@@ -382,6 +382,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
+	td->td_pcb->pcb_full_iret = 1;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
@@ -483,6 +484,7 @@ sigreturn(td, uap)
 	signotify(td);
 	PROC_UNLOCK(p);
 	td->td_pcb->pcb_flags |= PCB_FULLCTX;
+	td->td_pcb->pcb_full_iret = 1;
 	return (EJUSTRETURN);
 }
 
@@ -853,6 +855,7 @@ exec_setregs(td, entry, stack, ps_string
 	pcb->pcb_gsbase = 0;
 	pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT);
 	pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
+	pcb->pcb_full_iret = 1;
 
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = entry;
@@ -2031,6 +2034,7 @@ set_mcontext(struct thread *td, const mc
 		td->td_pcb->pcb_gsbase = mcp->mc_gsbase;
 	}
 	td->td_pcb->pcb_flags |= PCB_FULLCTX;
+	td->td_pcb->pcb_full_iret = 1;
 	return (0);
 }
 

Modified: head/sys/amd64/amd64/sys_machdep.c
==============================================================================
--- head/sys/amd64/amd64/sys_machdep.c	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/amd64/sys_machdep.c	Thu Jul  9 09:34:11 2009	(r195486)
@@ -103,6 +103,7 @@ sysarch_ldt(struct thread *td, struct sy
 		error = amd64_get_ldt(td, largs);
 		break;
 	case I386_SET_LDT:
+		td->td_pcb->pcb_full_iret = 1;
 		if (largs->descs != NULL) {
 			lp = (struct user_segment_descriptor *)
 			    kmem_alloc(kernel_map, largs->num *
@@ -132,6 +133,7 @@ update_gdt_gsbase(struct thread *td, uin
 
 	if (td != curthread)
 		return;
+	td->td_pcb->pcb_full_iret = 1;
 	critical_enter();
 	sd = PCPU_GET(gs32p);
 	sd->sd_lobase = base & 0xffffff;
@@ -146,6 +148,7 @@ update_gdt_fsbase(struct thread *td, uin
 
 	if (td != curthread)
 		return;
+	td->td_pcb->pcb_full_iret = 1;
 	critical_enter();
 	sd = PCPU_GET(fs32p);
 	sd->sd_lobase = base & 0xffffff;
@@ -201,6 +204,7 @@ sysarch(td, uap)
 		if (!error) {
 			pcb->pcb_fsbase = i386base;
 			td->td_frame->tf_fs = _ufssel;
+			pcb->pcb_full_iret = 1;
 			update_gdt_fsbase(td, i386base);
 		}
 		break;
@@ -212,6 +216,7 @@ sysarch(td, uap)
 		error = copyin(uap->parms, &i386base, sizeof(i386base));
 		if (!error) {
 			pcb->pcb_gsbase = i386base;
+			pcb->pcb_full_iret = 1;
 			td->td_frame->tf_gs = _ugssel;
 			update_gdt_gsbase(td, i386base);
 		}
@@ -225,6 +230,7 @@ sysarch(td, uap)
 		if (!error) {
 			if (a64base < VM_MAXUSER_ADDRESS) {
 				pcb->pcb_fsbase = a64base;
+				pcb->pcb_full_iret = 1;
 				td->td_frame->tf_fs = _ufssel;
 			} else
 				error = EINVAL;
@@ -240,6 +246,7 @@ sysarch(td, uap)
 		if (!error) {
 			if (a64base < VM_MAXUSER_ADDRESS) {
 				pcb->pcb_gsbase = a64base;
+				pcb->pcb_full_iret = 1;
 				td->td_frame->tf_gs = _ugssel;
 			} else
 				error = EINVAL;
@@ -525,6 +532,7 @@ amd64_set_ldt(td, uap, descs)
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 
+	td->td_pcb->pcb_full_iret = 1;
 	p = td->td_proc;
 	if (descs == NULL) {
 		/* Free descriptors */

Modified: head/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- head/sys/amd64/amd64/vm_machdep.c	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/amd64/vm_machdep.c	Thu Jul  9 09:34:11 2009	(r195486)
@@ -186,6 +186,9 @@ cpu_fork(td1, p2, td2, flags)
 	/* As an i386, do not copy io permission bitmap. */
 	pcb2->pcb_tssp = NULL;
 
+	/* New segment registers. */
+	pcb2->pcb_full_iret = 1;
+
 	/* Copy the LDT, if necessary. */
 	mdp1 = &td1->td_proc->p_md;
 	mdp2 = &p2->p_md;
@@ -336,6 +339,7 @@ cpu_set_upcall(struct thread *td, struct
 	 */
 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
 	pcb2->pcb_flags &= ~PCB_FPUINITDONE;
+	pcb2->pcb_full_iret = 1;
 
 	/*
 	 * Create a new fresh stack for the new thread.
@@ -450,6 +454,7 @@ cpu_set_user_tls(struct thread *td, void
 	}
 #endif
 	td->td_pcb->pcb_fsbase = (register_t)tls_base;
+	td->td_pcb->pcb_full_iret = 1;
 	return (0);
 }
 

Modified: head/sys/amd64/ia32/ia32_exception.S
==============================================================================
--- head/sys/amd64/ia32/ia32_exception.S	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/ia32/ia32_exception.S	Thu Jul  9 09:34:11 2009	(r195486)
@@ -42,10 +42,16 @@
 	SUPERALIGN_TEXT
 IDTVEC(int0x80_syscall)
 	swapgs
-	sti
 	pushq	$2			/* sizeof "int 0x80" */
 	subq	$TF_ERR,%rsp		/* skip over tf_trapno */
 	movq	%rdi,TF_RDI(%rsp)
+	movq	PCPU(CURPCB),%rdi
+	movb	$0,PCB_FULL_IRET(%rdi)
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	sti
 	movq	%rsi,TF_RSI(%rsp)
 	movq	%rdx,TF_RDX(%rsp)
 	movq	%rcx,TF_RCX(%rsp)
@@ -60,10 +66,6 @@ IDTVEC(int0x80_syscall)
 	movq	%r13,TF_R13(%rsp)
 	movq	%r14,TF_R14(%rsp)
 	movq	%r15,TF_R15(%rsp)
-	movw	%fs,TF_FS(%rsp)
-	movw	%gs,TF_GS(%rsp)
-	movw	%es,TF_ES(%rsp)
-	movw	%ds,TF_DS(%rsp)
 	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	movq	%rsp, %rdi

Modified: head/sys/amd64/ia32/ia32_reg.c
==============================================================================
--- head/sys/amd64/ia32/ia32_reg.c	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/ia32/ia32_reg.c	Thu Jul  9 09:34:11 2009	(r195486)
@@ -125,6 +125,7 @@ set_regs32(struct thread *td, struct reg
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
+	td->td_pcb->pcb_full_iret = 1;
 	tp->tf_flags = TF_HASSEGS;
 	tp->tf_rdi = regs->r_edi;
 	tp->tf_rsi = regs->r_esi;

Modified: head/sys/amd64/ia32/ia32_signal.c
==============================================================================
--- head/sys/amd64/ia32/ia32_signal.c	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/ia32/ia32_signal.c	Thu Jul  9 09:34:11 2009	(r195486)
@@ -159,6 +159,7 @@ ia32_get_mcontext(struct thread *td, str
 	ia32_get_fpcontext(td, mcp);
 	mcp->mc_fsbase = td->td_pcb->pcb_fsbase;
 	mcp->mc_gsbase = td->td_pcb->pcb_gsbase;
+	td->td_pcb->pcb_full_iret = 1;
 	return (0);
 }
 
@@ -201,6 +202,7 @@ ia32_set_mcontext(struct thread *td, con
 	tp->tf_rsp = mcp->mc_esp;
 	tp->tf_ss = mcp->mc_ss;
 	td->td_pcb->pcb_flags |= PCB_FULLCTX;
+	td->td_pcb->pcb_full_iret = 1;
 	return (0);
 }
 
@@ -394,6 +396,7 @@ freebsd4_ia32_sendsig(sig_t catcher, ksi
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
+	td->td_pcb->pcb_full_iret = 1;
 	/* leave user %fs and %gs untouched */
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
@@ -514,6 +517,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
+	td->td_pcb->pcb_full_iret = 1;
 	/* XXXKIB leave user %fs and %gs untouched */
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
@@ -611,6 +615,7 @@ freebsd4_freebsd32_sigreturn(td, uap)
 	SIG_CANTMASK(td->td_sigmask);
 	signotify(td);
 	PROC_UNLOCK(p);
+	td->td_pcb->pcb_full_iret = 1;
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
@@ -702,6 +707,7 @@ freebsd32_sigreturn(td, uap)
 	SIG_CANTMASK(td->td_sigmask);
 	signotify(td);
 	PROC_UNLOCK(p);
+	td->td_pcb->pcb_full_iret = 1;
 	return (EJUSTRETURN);
 }
 
@@ -747,5 +753,6 @@ ia32_setregs(td, entry, stack, ps_string
 	/* Return via doreti so that we can change to a different %cs */
 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
 	pcb->pcb_flags &= ~PCB_GS32BIT;
+	td->td_pcb->pcb_full_iret = 1;
 	td->td_retval[1] = 0;
 }

Modified: head/sys/amd64/include/pcb.h
==============================================================================
--- head/sys/amd64/include/pcb.h	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/include/pcb.h	Thu Jul  9 09:34:11 2009	(r195486)
@@ -72,12 +72,13 @@ struct pcb {
 	struct	savefpu	pcb_save;
 	uint16_t	pcb_initial_fpucw;
 
-	caddr_t	pcb_onfault;	/* copyin/out fault recovery */
+	caddr_t		pcb_onfault; /* copyin/out fault recovery */
 
 	/* 32-bit segment descriptor */
 	struct user_segment_descriptor	pcb_gs32sd;
 	/* local tss, with i/o bitmap; NULL for common */
 	struct amd64tss *pcb_tssp;
+	char		pcb_full_iret;
 };
 
 struct xpcb {

Modified: head/sys/amd64/linux32/linux32_sysvec.c
==============================================================================
--- head/sys/amd64/linux32/linux32_sysvec.c	Thu Jul  9 09:12:16 2009	(r195485)
+++ head/sys/amd64/linux32/linux32_sysvec.c	Thu Jul  9 09:34:11 2009	(r195486)
@@ -423,6 +423,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
+	td->td_pcb->pcb_full_iret = 1;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
@@ -545,6 +546,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t 
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
+	td->td_pcb->pcb_full_iret = 1;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
@@ -645,6 +647,7 @@ linux_sigreturn(struct thread *td, struc
 	regs->tf_rflags = eflags;
 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
 	regs->tf_ss     = frame.sf_sc.sc_ss;
+	td->td_pcb->pcb_full_iret = 1;
 
 	return (EJUSTRETURN);
 }
@@ -746,6 +749,7 @@ linux_rt_sigreturn(struct thread *td, st
 	regs->tf_rflags = eflags;
 	regs->tf_rsp    = context->sc_esp_at_signal;
 	regs->tf_ss     = context->sc_ss;
+	td->td_pcb->pcb_full_iret = 1;
 
 	/*
 	 * call sigaltstack & ignore results..
@@ -864,6 +868,7 @@ exec_linux_setregs(td, entry, stack, ps_
 	regs->tf_flags = TF_HASSEGS;
 	regs->tf_cs = _ucode32sel;
 	regs->tf_rbx = ps_strings;
+	td->td_pcb->pcb_full_iret = 1;
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 	fpstate_drop(td);
 


More information about the svn-src-all mailing list