svn commit: r326932 - in head/sys: i386/i386 x86/acpica x86/x86

Bruce Evans bde at FreeBSD.org
Mon Dec 18 13:53:23 UTC 2017


Author: bde
Date: Mon Dec 18 13:53:22 2017
New Revision: 326932
URL: https://svnweb.freebsd.org/changeset/base/326932

Log:
  Remove the permanent double mapping of low physical memory and replace
  it by a transient double mapping for the one instruction in ACPI wakeup
  where it is needed (and for many surrounding instructions in ACPI resume).
  Invalidate the TLB as soon as convenient after undoing the transient
  mapping.  ACPI resume already has the strict ordering needed for this.
  
  This fixes the non-trapping of null pointers and other garbage pointers
  below NBPDR (except transiently).  NBPDR is quite large (4MB, or 2MB for
  PAE).
  
  This fixes spurious traps at the first instruction in VM86 bioscalls.
  The traps are for transiently missing read permission in the first
  VM86 page (physical page 0) which was just written to at KERNBASE in
  the kernel.  The mechanism is unknown (it is not simply PG_G).
  
  locore uses a similar but larger transient double mapping and needs
  it for 2 instructions instead of 1.  Unmap the first PDE in it after
  the 2 instructions to detect most garbage pointers while bootstrapping.
  pmap_bootstrap() finishes the unmapping.
  
  Remove the avoidance of the double mapping for a recently fixed special
  case.  ACPI resume could use this avoidance (made non-special) to avoid
  any problems with the transient double mapping, but no such problems
  are known.
  
  Update comments in locore.  Many were for old versions of FreeBSD which
  tried to map low memory r/o except for special cases, or might have
  allowed access to low memory via physical offsets.  Now all kernel
  maps are r/w, and removal of of the double map disallows use of physical
  offsets again.

Modified:
  head/sys/i386/i386/locore.s
  head/sys/x86/acpica/acpi_wakeup.c
  head/sys/x86/x86/mp_x86.c

Modified: head/sys/i386/i386/locore.s
==============================================================================
--- head/sys/i386/i386/locore.s	Mon Dec 18 11:57:05 2017	(r326931)
+++ head/sys/i386/i386/locore.s	Mon Dec 18 13:53:22 2017	(r326932)
@@ -241,22 +241,30 @@ NON_GPROF_ENTRY(btext)
 #if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePDPT), %eax
 	movl	%eax, %cr3
-	movl	%cr4, %eax
-	orl	$CR4_PAE, %eax
-	movl	%eax, %cr4
+	movl	%cr4, %edx
+	orl	$CR4_PAE, %edx
+	movl	%edx, %cr4
 #else
 	movl	R(IdlePTD), %eax
 	movl	%eax,%cr3		/* load ptd addr into mmu */
 #endif
-	movl	%cr0,%eax		/* get control word */
-	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
-	movl	%eax,%cr0		/* and let's page NOW! */
+	movl	%cr0,%edx		/* get control word */
+	orl	$CR0_PE|CR0_PG,%edx	/* enable paging */
+	movl	%edx,%cr0		/* and let's page NOW! */
 
 	pushl	$begin			/* jump to high virtualized address */
 	ret
 
-/* now running relocated at KERNBASE where the system is linked to run */
 begin:
+	/*
+	 * Now running relocated at KERNBASE where the system is linked to run.
+	 *
+	 * Remove the lowest part of the double mapping of low memory to get
+	 * some null pointer checks.
+	 */
+	movl	$0,PTD
+	movl	%eax,%cr3		/* invalidate TLB */
+
 	/* set up bootstrap stack */
 	movl	proc0kstack,%eax	/* location of in-kernel stack */
 
@@ -725,14 +733,15 @@ no_kernend:
 
 /*
  * Initialize page table pages mapping physical address zero through the
- * end of the kernel.  All of the page table entries allow read and write
- * access.  Write access to the first physical page is required by bios32
- * calls, and write access to the first 1 MB of physical memory is required
- * by ACPI for implementing suspend and resume.  We do this even
- * if we've enabled PSE above, we'll just switch the corresponding kernel
- * PDEs before we turn on paging.
+ * (physical) end of the kernel.  Many of these pages must be reserved,
+ * and we reserve them all and map them linearly for convenience.  We do
+ * this even if we've enabled PSE above; we'll just switch the corresponding
+ * kernel PDEs before we turn on paging.
  *
  * XXX: We waste some pages here in the PSE case!
+ *
+ * This and all other page table entries allow read and write access for
+ * various reasons.  Kernel mappings never have any access restrictions.
  */
 	xorl	%eax, %eax
 	movl	R(KERNend),%ecx
@@ -784,42 +793,21 @@ no_kernend:
 
 /*
  * Create an identity mapping for low physical memory, including the kernel.
- * The part of this mapping given by the first PDE (for the first 4 MB or 2
- * MB of physical memory)
- * becomes a permanent part of the kernel's address space.  The rest of this
- * mapping is destroyed in pmap_bootstrap().  Ordinarily, the same page table
- * pages are shared by the identity mapping and the kernel's native mapping.
- * However, the permanent identity mapping cannot contain PG_G mappings.
- * Thus, if the (physical) kernel overlaps the permanent identity mapping
- * (and PG_G is enabled), the
- * page table for the first PDE must be duplicated and not shared.  
+ * This is only used to map the 2 instructions for jumping to 'begin' in
+ * locore (we map everything to avoid having to determine where these
+ * instructions are).  ACPI resume will transiently restore the first PDE in
+ * this mapping (and depend on this PDE's page table created here not being
+ * destroyed).  See pmap_bootstrap() for more details.
  *
- * N.B. Due to errata concerning large pages and physical address zero,
- * a PG_PS mapping is not used.
+ * Note:  There are errata concerning large pages and physical address zero,
+ * so a PG_PS mapping should not be used for PDE 0.  Our double mapping
+ * avoids this automatically by not using PG_PS for PDE #KPDI so that PAT
+ * bits can be set at the page level for i/o pages below 1 MB.
  */
 	movl	R(KPTphys), %eax
 	xorl	%ebx, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
-#if KERNLOAD < (1 << PDRSHIFT)
-	testl	$PG_G, R(pgeflag)
-	jz	1f
-	ALLOCPAGES(1)
-	movl	%esi, %eax
-	movl	$1, %ecx
-	fillkptphys($PG_RW)		/* map the new page table in std map */
-	movl	%esi, %edi
-	movl	R(IdlePTD), %eax
-	movl	(%eax), %esi		/* top bits are 0 for PAE */
-	andl	$~PAGE_MASK, %esi
-	movl	%edi, (%eax)
-	orl	$PG_V | PG_RW, (%eax)	/* finish writing new PTD[0] */
-	movl	$PAGE_SIZE, %ecx
-	cld
-	rep
-	movsb
-1:	
-#endif
 
 /*
  * Install PDEs for PTs covering enough kva to bootstrap.  Then for the PSE

Modified: head/sys/x86/acpica/acpi_wakeup.c
==============================================================================
--- head/sys/x86/acpica/acpi_wakeup.c	Mon Dec 18 11:57:05 2017	(r326931)
+++ head/sys/x86/acpica/acpi_wakeup.c	Mon Dec 18 13:53:22 2017	(r326932)
@@ -179,6 +179,17 @@ acpi_wakeup_cpus(struct acpi_softc *sc)
 		}
 	}
 
+#ifdef __i386__
+	/*
+	 * Remove the identity mapping of low memory for all CPUs and sync
+	 * the TLB for the BSP.  The APs are now spinning in
+	 * cpususpend_handler() and we will release them soon.  Then each
+	 * will invalidate its TLB.
+	 */
+	kernel_pmap->pm_pdir[0] = 0;
+	invltlb_glob();
+#endif
+
 	/* restore the warmstart vector */
 	*(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
 
@@ -234,6 +245,19 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
 		WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb);
 		WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit);
 		WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base);
+
+#ifdef __i386__
+		/*
+		 * Map some low memory with virt == phys for ACPI wakecode
+		 * to use to jump to high memory after enabling paging. This
+		 * is the same as for similar jump in locore, except the
+		 * jump is a single instruction, and we know its address
+		 * more precisely so only need a single PTD, and we have to
+		 * be careful to use the kernel map (PTD[0] is for curthread
+		 * which may be a user thread in deprecated APIs).
+		 */
+		kernel_pmap->pm_pdir[0] = PTD[KPTDI];
+#endif
 
 		/* Call ACPICA to enter the desired sleep state */
 		if (state == ACPI_STATE_S4 && sc->acpi_s4bios)

Modified: head/sys/x86/x86/mp_x86.c
==============================================================================
--- head/sys/x86/x86/mp_x86.c	Mon Dec 18 11:57:05 2017	(r326931)
+++ head/sys/x86/x86/mp_x86.c	Mon Dec 18 13:53:22 2017	(r326932)
@@ -1398,6 +1398,11 @@ cpususpend_handler(void)
 	while (!CPU_ISSET(cpu, &started_cpus))
 		ia32_pause();
 
+#ifdef __i386__
+	/* Finish removing the identity mapping of low memory for this AP. */
+	invltlb_glob();
+#endif
+
 	if (cpu_ops.cpu_resume)
 		cpu_ops.cpu_resume();
 #ifdef __amd64__


More information about the svn-src-head mailing list