svn commit: r305692 - in head: share/man/man9 sys/amd64/amd64 sys/amd64/include

Sun Sep 11 09:14:08 UTC 2016

Author: kib
Date: Sun Sep 11 09:14:07 2016
New Revision: 305692
URL: https://svnweb.freebsd.org/changeset/base/305692

Log:
  Add FPU_KERN_NOCTX flag to the fpu_kern_enter() function on amd64.
  
  The flag specifies that the block which uses FPU must be executed in
  critical section, i.e. take no context switches, and does not need an
  FPU save area during the execution.
  
  It is intended to be applied around fast and short code pathes where
  save area allocation is impossible or undesirable, due to context or
  due to the relative cost of calculation vs. allocation.
  
  Sponsored by:	The FreeBSD Foundation
  MFC after:	2 weeks

Modified:
  head/share/man/man9/fpu_kern.9
  head/sys/amd64/amd64/fpu.c
  head/sys/amd64/include/fpu.h
  head/sys/amd64/include/pcb.h

Modified: head/share/man/man9/fpu_kern.9
==============================================================================

--- head/share/man/man9/fpu_kern.9	Sun Sep 11 07:24:12 2016	(r305691)
+++ head/share/man/man9/fpu_kern.9	Sun Sep 11 09:14:07 2016	(r305692)
@@ -120,6 +120,16 @@ could be used from both kernel thread an
 The
 .Fn fpu_kern_leave
 function correctly handles such contexts.
+.It Dv FPU_KERN_NOCTX
+Avoid nesting save area.
+If the flag is specified, the
+.Fa ctx
+must be passed as
+.Va NULL .
+The flag should only be used for really short code blocks
+which can be executed in a critical section.
+It avoids the need to allocate the FPU context by the cost
+of increased system latency.
 .El
 .El
 .Pp

Modified: head/sys/amd64/amd64/fpu.c
==============================================================================
--- head/sys/amd64/amd64/fpu.c	Sun Sep 11 07:24:12 2016	(r305691)
+++ head/sys/amd64/amd64/fpu.c	Sun Sep 11 09:14:07 2016	(r305692)
@@ -633,6 +633,8 @@ fpudna(void)
 	 */
 	critical_enter();
 
+	KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0,
+	    ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)"));
 	if (PCPU_GET(fpcurthread) == curthread) {
 		printf("fpudna: fpcurthread == curthread\n");
 		stop_emulating();
@@ -964,13 +966,39 @@ fpu_kern_enter(struct thread *td, struct
 {
 	struct pcb *pcb;
 
-	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx"));
+	pcb = td->td_pcb;
+	KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
+	    ("ctx is required when !FPU_KERN_NOCTX"));
+	KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
+	    ("using inuse ctx"));
+	KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0,
+	    ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state"));
 
+	if ((flags & FPU_KERN_NOCTX) != 0) {
+		critical_enter();
+		stop_emulating();
+		if (curthread == PCPU_GET(fpcurthread)) {
+			fpusave(curpcb->pcb_save);
+			PCPU_SET(fpcurthread, NULL);
+		} else {
+			KASSERT(PCPU_GET(fpcurthread) == NULL,
+			    ("invalid fpcurthread"));
+		}
+
+		/*
+		 * This breaks XSAVEOPT tracker, but
+		 * PCB_FPUNOSAVE state is supposed to never need to
+		 * save FPU context at all.
+		 */
+		fpurestore(fpu_initialstate);
+		set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE |
+		    PCB_FPUINITDONE);
+		return (0);
+	}
 	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
 		ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
 		return (0);
 	}
-	pcb = td->td_pcb;
 	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
 	    get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
 	ctx->flags = FPU_KERN_CTX_INUSE;
@@ -989,19 +1017,34 @@ fpu_kern_leave(struct thread *td, struct
 {
 	struct pcb *pcb;
 
-	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
-	    ("leaving not inuse ctx"));
-	ctx->flags &= ~FPU_KERN_CTX_INUSE;
-
-	if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
-		return (0);
-	KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
 	pcb = td->td_pcb;
-	critical_enter();
-	if (curthread == PCPU_GET(fpcurthread))
-		fpudrop();
-	critical_exit();
-	pcb->pcb_save = ctx->prev;
+
+	if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) {
+		KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
+		KASSERT(PCPU_GET(fpcurthread) == NULL,
+		    ("non-NULL fpcurthread for PCB_FPUNOSAVE"));
+		CRITICAL_ASSERT(td);
+
+		clear_pcb_flags(pcb,  PCB_FPUNOSAVE | PCB_FPUINITDONE);
+		start_emulating();
+		critical_exit();
+	} else {
+		KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
+		    ("leaving not inuse ctx"));
+		ctx->flags &= ~FPU_KERN_CTX_INUSE;
+
+		if (is_fpu_kern_thread(0) &&
+		    (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
+			return (0);
+		KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0,
+		    ("dummy ctx"));
+		critical_enter();
+		if (curthread == PCPU_GET(fpcurthread))
+			fpudrop();
+		critical_exit();
+		pcb->pcb_save = ctx->prev;
+	}
+
 	if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
 		if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) {
 			set_pcb_flags(pcb, PCB_FPUINITDONE);

Modified: head/sys/amd64/include/fpu.h
==============================================================================
--- head/sys/amd64/include/fpu.h	Sun Sep 11 07:24:12 2016	(r305691)
+++ head/sys/amd64/include/fpu.h	Sun Sep 11 09:14:07 2016	(r305692)
@@ -86,6 +86,7 @@ void	fpu_save_area_reset(struct savefpu 
 #define	FPU_KERN_NORMAL	0x0000
 #define	FPU_KERN_NOWAIT	0x0001
 #define	FPU_KERN_KTHR	0x0002
+#define	FPU_KERN_NOCTX	0x0004
 
 #endif
 

Modified: head/sys/amd64/include/pcb.h
==============================================================================
--- head/sys/amd64/include/pcb.h	Sun Sep 11 07:24:12 2016	(r305691)
+++ head/sys/amd64/include/pcb.h	Sun Sep 11 09:14:07 2016	(r305692)
@@ -83,6 +83,7 @@ struct pcb {
 #define	PCB_FPUINITDONE	0x08	/* fpu state is initialized */
 #define	PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */
 #define	PCB_32BIT	0x40	/* process has 32 bit context (segs etc) */
+#define	PCB_FPUNOSAVE	0x80	/* no save area for current FPU ctx */
 
 	uint16_t	pcb_initial_fpucw;