svn commit: r354095 - in head/sys: amd64/amd64 amd64/include kern

Konstantin Belousov kib at FreeBSD.org
Fri Oct 25 20:09:45 UTC 2019


Author: kib
Date: Fri Oct 25 20:09:42 2019
New Revision: 354095
URL: https://svnweb.freebsd.org/changeset/base/354095

Log:
  amd64: move pcb out of kstack to struct thread.
  
  This saves 320 bytes of the precious stack space.
  
  The only negative aspect of the change I can think of is that the
  struct thread increased by 320 bytes obviously, and that 320 bytes are
  not swapped out anymore. I believe the freed stack space is much more
  important than that.  Also, current struct thread size is 1392 bytes
  on amd64, so UMA will allocate two thread structures per (4KB) slab,
  which leaves a space for pcb without increasing zone memory use.
  
  Reviewed by:	alc, markj
  Tested by:	pho
  Sponsored by:	The FreeBSD Foundation
  MFC after:	2 weeks
  Differential revision:	https://reviews.freebsd.org/D22138

Modified:
  head/sys/amd64/amd64/cpu_switch.S
  head/sys/amd64/amd64/genassym.c
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/amd64/vm_machdep.c
  head/sys/amd64/include/md_var.h
  head/sys/amd64/include/pcpu_aux.h
  head/sys/amd64/include/proc.h
  head/sys/kern/kern_thread.c

Modified: head/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- head/sys/amd64/amd64/cpu_switch.S	Fri Oct 25 19:38:02 2019	(r354094)
+++ head/sys/amd64/amd64/cpu_switch.S	Fri Oct 25 20:09:42 2019	(r354095)
@@ -74,7 +74,7 @@ END(cpu_throw)
  */
 ENTRY(cpu_switch)
 	/* Switch to new thread.  First, save context. */
-	movq	TD_PCB(%rdi),%r8
+	leaq	TD_MD_PCB(%rdi),%r8
 
 	movq	(%rsp),%rax			/* Hardware registers */
 	movq	%r15,PCB_R15(%r8)
@@ -140,7 +140,7 @@ ctx_switch_xsave:
 	callq	pmap_activate_sw
 	movq	%r15,TD_LOCK(%r13)		/* Release the old thread */
 sw1:
-	movq	TD_PCB(%r12),%r8
+	leaq	TD_MD_PCB(%r12),%r8
 #if defined(SCHED_ULE) && defined(SMP)
 	movq	$blocked_lock, %rdx
 	movq	TD_LOCK(%r12),%rcx
@@ -193,11 +193,12 @@ do_kthread:
 	cmpq	%rax,%rdx
 	jne	do_tss
 done_tss:
-	movq	%r8,PCPU(RSP0)
+	movq	TD_MD_STACK_BASE(%r12),%r9
+	movq	%r9,PCPU(RSP0)
 	movq	%r8,PCPU(CURPCB)
 	movq	PCPU(PTI_RSP0),%rax
 	cmpq	$~0,PCPU(UCR3)
-	cmove	%r8,%rax
+	cmove	%r9,%rax
 	movq	%rax,TSS_RSP0(%rdx)
 	movq	%r12,PCPU(CURTHREAD)		/* into next thread */
 

Modified: head/sys/amd64/amd64/genassym.c
==============================================================================
--- head/sys/amd64/amd64/genassym.c	Fri Oct 25 19:38:02 2019	(r354094)
+++ head/sys/amd64/amd64/genassym.c	Fri Oct 25 20:09:42 2019	(r354095)
@@ -87,6 +87,8 @@ ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
 ASSYM(TD_FRAME, offsetof(struct thread, td_frame));
 ASSYM(TD_MD, offsetof(struct thread, td_md));
+ASSYM(TD_MD_PCB, offsetof(struct thread, td_md.md_pcb));
+ASSYM(TD_MD_STACK_BASE, offsetof(struct thread, td_md.md_stack_base));
 
 ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
 ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c	Fri Oct 25 19:38:02 2019	(r354094)
+++ head/sys/amd64/amd64/machdep.c	Fri Oct 25 20:09:42 2019	(r354095)
@@ -1789,12 +1789,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	amd64_conf_fast_syscall();
 
 	/*
-	 * Temporary forge some valid pointer to PCB, for exception
-	 * handlers.  It is reinitialized properly below after FPU is
-	 * set up.  Also set up td_critnest to short-cut the page
-	 * fault handler.
+	 * We initialize the PCB pointer early so that exception
+	 * handlers will work.  Also set up td_critnest to short-cut
+	 * the page fault handler.
 	 */
 	cpu_max_ext_state_size = sizeof(struct savefpu);
+	set_top_of_stack_td(&thread0);
 	thread0.td_pcb = get_pcb_td(&thread0);
 	thread0.td_critnest = 1;
 
@@ -1850,11 +1850,10 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	fpuinit();
 
 	/*
-	 * Set up thread0 pcb after fpuinit calculated pcb + fpu save
+	 * Set up thread0 pcb save area after fpuinit calculated fpu save
 	 * area size.  Zero out the extended state header in fpu save
 	 * area.
 	 */
-	thread0.td_pcb = get_pcb_td(&thread0);
 	thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 	if (use_xsave) {
@@ -1863,7 +1862,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 		xhdr->xstate_bv = xsave_mask;
 	}
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
-	rsp0 = (vm_offset_t)thread0.td_pcb;
+	rsp0 = thread0.td_md.md_stack_base;
 	/* Ensure the stack is aligned to 16 bytes */
 	rsp0 &= ~0xFul;
 	common_tss[0].tss_rsp0 = rsp0;
@@ -1899,7 +1898,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	TSEXIT();
 
 	/* Location of kernel stack for locore */
-	return ((u_int64_t)thread0.td_pcb);
+	return (thread0.td_md.md_stack_base);
 }
 
 void

Modified: head/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- head/sys/amd64/amd64/vm_machdep.c	Fri Oct 25 19:38:02 2019	(r354094)
+++ head/sys/amd64/amd64/vm_machdep.c	Fri Oct 25 20:09:42 2019	(r354095)
@@ -87,37 +87,41 @@ __FBSDID("$FreeBSD$");
 _Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
     "OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf.");
 
-struct savefpu *
-get_pcb_user_save_td(struct thread *td)
+void
+set_top_of_stack_td(struct thread *td)
 {
-	vm_offset_t p;
-
-	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+	td->td_md.md_stack_base = td->td_kstack +
+	    td->td_kstack_pages * PAGE_SIZE -
 	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
-	KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area"));
-	return ((struct savefpu *)p);
 }
 
 struct savefpu *
-get_pcb_user_save_pcb(struct pcb *pcb)
+get_pcb_user_save_td(struct thread *td)
 {
 	vm_offset_t p;
 
-	p = (vm_offset_t)(pcb + 1);
+	p = td->td_md.md_stack_base;
+	KASSERT((p % XSAVE_AREA_ALIGN) == 0,
+	    ("Unaligned pcb_user_save area ptr %#lx td %p", p, td));
 	return ((struct savefpu *)p);
 }
 
 struct pcb *
 get_pcb_td(struct thread *td)
 {
-	vm_offset_t p;
 
-	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
-	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) -
-	    sizeof(struct pcb);
-	return ((struct pcb *)p);
+	return (&td->td_md.md_pcb);
 }
 
+struct savefpu *
+get_pcb_user_save_pcb(struct pcb *pcb)
+{
+	struct thread *td;
+
+	td = __containerof(pcb, struct thread, td_md.md_pcb);
+	return (get_pcb_user_save_td(td));
+}
+
 void *
 alloc_fpusave(int flags)
 {
@@ -165,9 +169,9 @@ cpu_fork(struct thread *td1, struct proc *p2, struct t
 	fpuexit(td1);
 	update_pcb_bases(td1->td_pcb);
 
-	/* Point the pcb to the top of the stack */
-	pcb2 = get_pcb_td(td2);
-	td2->td_pcb = pcb2;
+	/* Point the stack and pcb to the actual location */
+	set_top_of_stack_td(td2);
+	td2->td_pcb = pcb2 = get_pcb_td(td2);
 
 	/* Copy td1's pcb */
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
@@ -186,7 +190,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct t
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 */
-	td2->td_frame = (struct trapframe *)td2->td_pcb - 1;
+	td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1;
 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
 
 	td2->td_frame->tf_rax = 0;		/* Child returns zero */
@@ -351,8 +355,9 @@ cpu_thread_alloc(struct thread *td)
 	struct pcb *pcb;
 	struct xstate_hdr *xhdr;
 
+	set_top_of_stack_td(td);
 	td->td_pcb = pcb = get_pcb_td(td);
-	td->td_frame = (struct trapframe *)pcb - 1;
+	td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
 	pcb->pcb_save = get_pcb_user_save_pcb(pcb);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
@@ -490,7 +495,6 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
 {
 	struct pcb *pcb2;
 
-	/* Point the pcb to the top of the stack. */
 	pcb2 = td->td_pcb;
 
 	/*

Modified: head/sys/amd64/include/md_var.h
==============================================================================
--- head/sys/amd64/include/md_var.h	Fri Oct 25 19:38:02 2019	(r354094)
+++ head/sys/amd64/include/md_var.h	Fri Oct 25 20:09:42 2019	(r354095)
@@ -83,6 +83,7 @@ void	fpstate_drop(struct thread *td);
 void	pagezero(void *addr);
 void	setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist);
 void	sse2_pagezero(void *addr);
+void	set_top_of_stack_td(struct thread *td);
 struct savefpu *get_pcb_user_save_td(struct thread *td);
 struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
 void	pci_early_quirks(void);

Modified: head/sys/amd64/include/pcpu_aux.h
==============================================================================
--- head/sys/amd64/include/pcpu_aux.h	Fri Oct 25 19:38:02 2019	(r354094)
+++ head/sys/amd64/include/pcpu_aux.h	Fri Oct 25 20:09:42 2019	(r354095)
@@ -57,16 +57,6 @@ __curthread(void)
 	return (td);
 }
 #define	curthread		(__curthread())
-
-static __inline __pure2 struct pcb *
-__curpcb(void)
-{
-	struct pcb *pcb;
-
-	__asm("movq %%gs:%P1,%0" : "=r" (pcb) : "n" (offsetof(struct pcpu,
-	    pc_curpcb)));
-	return (pcb);
-}
-#define	curpcb		(__curpcb())
+#define	curpcb			(&curthread->td_md.md_pcb)
 
 #endif	/* _MACHINE_PCPU_AUX_H_ */

Modified: head/sys/amd64/include/proc.h
==============================================================================
--- head/sys/amd64/include/proc.h	Fri Oct 25 19:38:02 2019	(r354094)
+++ head/sys/amd64/include/proc.h	Fri Oct 25 20:09:42 2019	(r354095)
@@ -36,6 +36,7 @@
 #define	_MACHINE_PROC_H_
 
 #include <sys/queue.h>
+#include <machine/pcb.h>
 #include <machine/segments.h>
 
 /*
@@ -72,6 +73,8 @@ struct mdthread {
 	struct pmap_invl_gen md_invl_gen;
 	register_t md_efirt_tmp;	/* (k) */
 	int	md_efirt_dis_pf;	/* (k) */
+	struct pcb md_pcb;
+	vm_offset_t md_stack_base;
 };
 
 struct mdproc {

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c	Fri Oct 25 19:38:02 2019	(r354094)
+++ head/sys/kern/kern_thread.c	Fri Oct 25 20:09:42 2019	(r354095)
@@ -84,7 +84,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0
     "struct thread KBI td_pflags");
 _Static_assert(offsetof(struct thread, td_frame) == 0x478,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x540,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x690,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0xb0,
     "struct proc KBI p_flag");


More information about the svn-src-head mailing list