svn commit: r330845 - in head/sys/powerpc: aim ofw powerpc

Nathan Whitehorn nwhitehorn at FreeBSD.org
Tue Mar 13 15:04:00 UTC 2018


Author: nwhitehorn
Date: Tue Mar 13 15:03:58 2018
New Revision: 330845
URL: https://svnweb.freebsd.org/changeset/base/330845

Log:
  Execute PowerPC64/AIM kernel from direct map region when possible.
  
  When the kernel can be in real mode in early boot, we can execute from
  high addresses aliased to the kernel's physical memory. If that high
  address has the first two bits set to 1 (0xc...), those addresses will
  automatically become part of the direct map. This reduces page table
  pressure from the kernel and it sets up the kernel to be used with
  radix translation, for which it has to be up here.
  
  This is accomplished by exploiting the fact that all PowerPC kernels are
  built as position-independent executables and relocate themselves
  on start. Before this patch, the kernel runs at 1:1 VA:PA, but that
  VA/PA is random and set by the bootloader. Very early, it processes
  its ELF relocations to operate wherever it happens to find itself.
  This patch uses that mechanism to re-enter and re-relocate the kernel
  a second time witha new base address set up in the early parts of
  powerpc_init().
  
  Reviewed by:	jhibbits
  Differential Revision:	D14647

Modified:
  head/sys/powerpc/aim/aim_machdep.c
  head/sys/powerpc/aim/locore64.S
  head/sys/powerpc/aim/mmu_oea64.c
  head/sys/powerpc/ofw/ofwcall64.S
  head/sys/powerpc/powerpc/machdep.c

Modified: head/sys/powerpc/aim/aim_machdep.c
==============================================================================
--- head/sys/powerpc/aim/aim_machdep.c	Tue Mar 13 15:02:46 2018	(r330844)
+++ head/sys/powerpc/aim/aim_machdep.c	Tue Mar 13 15:03:58 2018	(r330845)
@@ -160,15 +160,72 @@ extern void	*dlmisstrap, *dlmisssize;
 extern void	*dsmisstrap, *dsmisssize;
 
 extern void *ap_pcpu;
+extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr);
 
+void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
+    void *mdp, uint32_t mdp_cookie);
 void aim_cpu_init(vm_offset_t toc);
 
 void
+aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp,
+    uint32_t mdp_cookie)
+{
+	register_t	scratch;
+
+	/*
+	 * If running from an FDT, make sure we are in real mode to avoid
+	 * tromping on firmware page tables. Everything in the kernel assumes
+	 * 1:1 mappings out of firmware, so this won't break anything not
+	 * already broken. This doesn't work if there is live OF, since OF
+	 * may internally use non-1:1 mappings.
+	 */
+	if (ofentry == 0)
+		mtmsr(mfmsr() & ~(PSL_IR | PSL_DR));
+
+#ifdef __powerpc64__
+	/*
+	 * If in real mode, relocate to high memory so that the kernel
+	 * can execute from the direct map.
+	 */
+	if (!(mfmsr() & PSL_DR) &&
+	    (vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS)
+		__restartkernel(fdt, 0, ofentry, mdp, mdp_cookie,
+		    DMAP_BASE_ADDRESS, mfmsr());
+#endif
+
+	/* Various very early CPU fix ups */
+	switch (mfpvr() >> 16) {
+		/*
+		 * PowerPC 970 CPUs have a misfeature requested by Apple that
+		 * makes them pretend they have a 32-byte cacheline. Turn this
+		 * off before we measure the cacheline size.
+		 */
+		case IBM970:
+		case IBM970FX:
+		case IBM970MP:
+		case IBM970GX:
+			scratch = mfspr(SPR_HID5);
+			scratch &= ~HID5_970_DCBZ_SIZE_HI;
+			mtspr(SPR_HID5, scratch);
+			break;
+	#ifdef __powerpc64__
+		case IBMPOWER7:
+		case IBMPOWER7PLUS:
+		case IBMPOWER8:
+		case IBMPOWER8E:
+			/* XXX: get from ibm,slb-size in device tree */
+			n_slbs = 32;
+			break;
+	#endif
+	}
+}
+
+void
 aim_cpu_init(vm_offset_t toc)
 {
 	size_t		trap_offset, trapsize;
 	vm_offset_t	trap;
-	register_t	msr, scratch;
+	register_t	msr;
 	uint8_t		*cache_check;
 	int		cacheline_warn;
 	#ifndef __powerpc64__
@@ -198,32 +255,6 @@ aim_cpu_init(vm_offset_t toc)
 	 * Bits 1-4, 10-15 (ppc32), 33-36, 42-47 (ppc64)
 	 */
 	psl_userstatic &= ~0x783f0000UL;
-
-	/* Various very early CPU fix ups */
-	switch (mfpvr() >> 16) {
-		/*
-		 * PowerPC 970 CPUs have a misfeature requested by Apple that
-		 * makes them pretend they have a 32-byte cacheline. Turn this
-		 * off before we measure the cacheline size.
-		 */
-		case IBM970:
-		case IBM970FX:
-		case IBM970MP:
-		case IBM970GX:
-			scratch = mfspr(SPR_HID5);
-			scratch &= ~HID5_970_DCBZ_SIZE_HI;
-			mtspr(SPR_HID5, scratch);
-			break;
-	#ifdef __powerpc64__
-		case IBMPOWER7:
-		case IBMPOWER7PLUS:
-		case IBMPOWER8:
-		case IBMPOWER8E:
-			/* XXX: get from ibm,slb-size in device tree */
-			n_slbs = 32;
-			break;
-	#endif
-	}
 
 	/*
 	 * Initialize the interrupt tables and figure out our cache line

Modified: head/sys/powerpc/aim/locore64.S
==============================================================================
--- head/sys/powerpc/aim/locore64.S	Tue Mar 13 15:02:46 2018	(r330844)
+++ head/sys/powerpc/aim/locore64.S	Tue Mar 13 15:03:58 2018	(r330845)
@@ -105,7 +105,6 @@ ap_kexec_start:		/* At 0x60 past start, copied to 0x60
 	mtsrr1	%r1
 	ba	EXC_RST
 
-
 /*
  * Now start the real text section
  */
@@ -149,9 +148,12 @@ ASENTRY_NOPROF(__start)
 	subf    %r31,%r31,%r2	/* Subtract from real TOC base to get base */
 
 	/* Set up the stack pointer */
-	ld	%r1,TOC_REF(tmpstk)(%r2)
-	addi	%r1,%r1,TMPSTKSZ-96
-	add	%r1,%r1,%r31
+	bl	1f
+	.llong	tmpstk + TMPSTKSZ - 96 - .
+1:	mflr	%r30
+	ld	%r1,0(%r30)
+	add	%r1,%r1,%r30
+	nop
 
 	/* Relocate kernel */
 	std	%r3,48(%r1)
@@ -188,5 +190,21 @@ ASENTRY_NOPROF(__start)
 
 	/* Unreachable */
 	b	.
+
+ASENTRY_NOPROF(__restartkernel)
+	/*
+	 * r3-r7: arguments to go to __start
+	 * r8: offset from current kernel address to apply
+	 * r9: MSR to set when (atomically) jumping to __start + r8
+	 */
+	mtsrr1	%r9
+	bl	1f
+1:	mflr	%r25
+	add	%r25,%r8,%r25
+	addi	%r25,%r25,2f-1b
+	mtsrr0	%r25
+	rfid
+2:	bl	__start
+	nop
 
 #include <powerpc/aim/trap_subr64.S>

Modified: head/sys/powerpc/aim/mmu_oea64.c
==============================================================================
--- head/sys/powerpc/aim/mmu_oea64.c	Tue Mar 13 15:02:46 2018	(r330844)
+++ head/sys/powerpc/aim/mmu_oea64.c	Tue Mar 13 15:03:58 2018	(r330845)
@@ -701,6 +701,7 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
 {
 	int		i, j;
 	vm_size_t	physsz, hwphyssz;
+	vm_paddr_t	kernelphysstart, kernelphysend;
 
 #ifndef __powerpc64__
 	/* We don't have a direct map since there is no BAT */
@@ -727,6 +728,9 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
 	__syncicache((void *)EXC_ISE, 0x80);
 #endif
 
+	kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS;
+	kernelphysend = kernelend & ~DMAP_BASE_ADDRESS;
+
 	/* Get physical memory regions from firmware */
 	mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
 	CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
@@ -764,29 +768,30 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernels
 		if (phys_avail[j] < EXC_LAST)
 			phys_avail[j] += EXC_LAST;
 
-		if (kernelstart >= phys_avail[j] &&
-		    kernelstart < phys_avail[j+1]) {
-			if (kernelend < phys_avail[j+1]) {
+		if (kernelphysstart >= phys_avail[j] &&
+		    kernelphysstart < phys_avail[j+1]) {
+			if (kernelphysend < phys_avail[j+1]) {
 				phys_avail[2*phys_avail_count] =
-				    (kernelend & ~PAGE_MASK) + PAGE_SIZE;
+				    (kernelphysend & ~PAGE_MASK) + PAGE_SIZE;
 				phys_avail[2*phys_avail_count + 1] =
 				    phys_avail[j+1];
 				phys_avail_count++;
 			}
 
-			phys_avail[j+1] = kernelstart & ~PAGE_MASK;
+			phys_avail[j+1] = kernelphysstart & ~PAGE_MASK;
 		}
 
-		if (kernelend >= phys_avail[j] &&
-		    kernelend < phys_avail[j+1]) {
-			if (kernelstart > phys_avail[j]) {
+		if (kernelphysend >= phys_avail[j] &&
+		    kernelphysend < phys_avail[j+1]) {
+			if (kernelphysstart > phys_avail[j]) {
 				phys_avail[2*phys_avail_count] = phys_avail[j];
 				phys_avail[2*phys_avail_count + 1] =
-				    kernelstart & ~PAGE_MASK;
+				    kernelphysstart & ~PAGE_MASK;
 				phys_avail_count++;
 			}
 
-			phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE;
+			phys_avail[j] = (kernelphysend & ~PAGE_MASK) +
+			    PAGE_SIZE;
 		}
 	}
 

Modified: head/sys/powerpc/ofw/ofwcall64.S
==============================================================================
--- head/sys/powerpc/ofw/ofwcall64.S	Tue Mar 13 15:02:46 2018	(r330844)
+++ head/sys/powerpc/ofw/ofwcall64.S	Tue Mar 13 15:03:58 2018	(r330845)
@@ -42,7 +42,7 @@
 ofwstk:
 	.space	OFWSTKSZ
 rtas_regsave:
-	.space	24 /* 3 * sizeof(register_t) */
+	.space	32 /* 4 * sizeof(register_t) */
 GLOBAL(ofmsr)
 	.llong  0, 0, 0, 0, 0		/* msr/sprg0-3 used in Open Firmware */
 GLOBAL(rtasmsr)
@@ -64,8 +64,8 @@ TOC_ENTRY(rtas_regsave)
  */
 
 ASENTRY_NOPROF(ofwcall)
-	mflr	%r0
-	std	%r0,16(%r1)
+	mflr	%r8
+	std	%r8,16(%r1)
 	stdu	%r1,-208(%r1)
 
 	/*
@@ -106,7 +106,7 @@ ASENTRY_NOPROF(ofwcall)
 
 	/* Get OF stack pointer */
 	ld	%r7,TOC_REF(ofwstk)(%r2)
-	addi	%r7,%r7,OFWSTKSZ-32
+	addi	%r7,%r7,OFWSTKSZ-40
 
 	/*
 	 * Set the MSR to the OF value. This has the side effect of disabling
@@ -129,6 +129,8 @@ ASENTRY_NOPROF(ofwcall)
 	std	%r5,8(%r1)	/* Save real stack pointer */
 	std	%r2,16(%r1)	/* Save old TOC */
 	std	%r6,24(%r1)	/* Save old MSR */
+	std	%r8,32(%r1)	/* Save high 32-bits of the kernel's PC */
+
 	li	%r5,0
 	stw	%r5,4(%r1)
 	stw	%r5,0(%r1)
@@ -137,15 +139,23 @@ ASENTRY_NOPROF(ofwcall)
 	mtctr	%r4
 	bctrl
 
-	/* Reload stack pointer and MSR from the OFW stack */
+	/* Reload stack pointer, MSR, and reference PC from the OFW stack */
+	ld	%r7,32(%r1)
 	ld	%r6,24(%r1)
 	ld	%r2,16(%r1)
 	ld	%r1,8(%r1)
 
-	/* Now set the real MSR */
-	mtmsrd	%r6
-	isync
+	/* Get back to the MSR/PC we want, using the cached high bits of PC */
+	mtsrr1	%r6
+	clrrdi	%r7,%r7,32
+	bl	1f
+1:	mflr	%r8
+	or	%r8,%r8,%r7
+	addi	%r8,%r8,2f-1b
+	mtsrr0	%r8
+	rfid			/* Turn on MMU, exceptions, and 64-bit mode */
 
+2:
 	/* Sign-extend the return value from OF */
 	extsw	%r3,%r3
 
@@ -186,8 +196,8 @@ ASENTRY_NOPROF(ofwcall)
  */
 
 ASENTRY_NOPROF(rtascall)
-	mflr	%r0
-	std	%r0,16(%r1)
+	mflr	%r9
+	std	%r9,16(%r1)
 	stdu	%r1,-208(%r1)
 
 	/*
@@ -248,24 +258,41 @@ ASENTRY_NOPROF(rtascall)
 	std	%r7,0(%r1)	/* Save 64-bit stack pointer */
 	std	%r2,8(%r1)	/* Save TOC */
 	std	%r6,16(%r1)	/* Save MSR */
+	std	%r9,24(%r1)	/* Save reference PC for high 32 bits */
 
 	/* Finally, branch to RTAS */
 	mtctr	%r5
 	bctrl
 
 	/* 
-	 * Reload stack pointer and MSR from the reg save area in r1. We are
-	 * running in 32-bit mode at this point, so it doesn't matter if r1
+	 * Reload stack pointer, MSR, reg PC from the reg save area in r1. We
+	 * are running in 32-bit mode at this point, so it doesn't matter if r1
 	 * has become sign-extended.
 	 */
+	ld	%r7,24(%r1)
 	ld	%r6,16(%r1)
 	ld	%r2,8(%r1)
 	ld	%r1,0(%r1)
 
-	/* Now set the real MSR */
-	mtmsrd	%r6
-	isync
+	/*
+	 * Get back to the right PC. We need to atomically re-enable
+	 * exceptions, 64-bit mode, and the MMU. One thing that has likely
+	 * happened is that, if we were running in the high-memory direct
+	 * map, we no longer are as a result of LR truncation in RTAS.
+	 * Fix this by copying the high-order bits of the LR at function
+	 * entry onto the current PC and then jumping there while flipping
+	 * all the MSR bits.
+	 */
+	mtsrr1	%r6
+	clrrdi	%r7,%r7,32
+	bl	1f
+1:	mflr	%r8
+	or	%r8,%r8,%r7
+	addi	%r8,%r8,2f-1b
+	mtsrr0	%r8
+	rfid			/* Turn on MMU, exceptions, and 64-bit mode */
 
+2:
 	/* Sign-extend the return value from RTAS */
 	extsw	%r3,%r3
 

Modified: head/sys/powerpc/powerpc/machdep.c
==============================================================================
--- head/sys/powerpc/powerpc/machdep.c	Tue Mar 13 15:02:46 2018	(r330844)
+++ head/sys/powerpc/powerpc/machdep.c	Tue Mar 13 15:03:58 2018	(r330845)
@@ -237,6 +237,8 @@ extern unsigned char	__sbss_start[];
 extern unsigned char	__sbss_end[];
 extern unsigned char	_end[];
 
+void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
+    void *mdp, uint32_t mdp_cookie);
 void aim_cpu_init(vm_offset_t toc);
 void booke_cpu_init(void);
 
@@ -247,7 +249,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
 	struct		pcpu *pc;
 	struct cpuref	bsp;
 	vm_offset_t	startkernel, endkernel;
-	void		*kmdp;
 	char		*env;
         bool		ofw_bootargs = false;
 #ifdef DDB
@@ -255,8 +256,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
 	vm_offset_t ksym_end;
 #endif
 
-	kmdp = NULL;
-
 	/* First guess at start/end kernel positions */
 	startkernel = __startkernel;
 	endkernel = __endkernel;
@@ -278,15 +277,7 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
 #endif
 
 #ifdef AIM
-	/*
-	 * If running from an FDT, make sure we are in real mode to avoid
-	 * tromping on firmware page tables. Everything in the kernel assumes
-	 * 1:1 mappings out of firmware, so this won't break anything not
-	 * already broken. This doesn't work if there is live OF, since OF
-	 * may internally use non-1:1 mappings.
-	 */
-	if (ofentry == 0)
-		mtmsr(mfmsr() & ~(PSL_IR | PSL_DR));
+	aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
 #endif
 
 	/*
@@ -295,14 +286,33 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offs
 	 * boothowto.
 	 */
 	if (mdp != NULL) {
+		void *kmdp = NULL;
+		char *envp = NULL;
+		uintptr_t md_offset = 0;
+		vm_paddr_t kernelendphys;
+
+#ifdef AIM
+		if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS)
+			md_offset = DMAP_BASE_ADDRESS;
+#endif
+
 		preload_metadata = mdp;
+		if (md_offset > 0) {
+			preload_metadata += md_offset;
+			preload_bootstrap_relocate(md_offset);
+		}
 		kmdp = preload_search_by_type("elf kernel");
 		if (kmdp != NULL) {
 			boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
-			init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *),
-			    0);
-			endkernel = ulmax(endkernel, MD_FETCH(kmdp,
-			    MODINFOMD_KERNEND, vm_offset_t));
+			envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
+			if (envp != NULL)
+				envp += md_offset;
+			init_static_kenv(envp, 0);
+			kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND,
+			    vm_offset_t);
+			if (kernelendphys != 0)
+				kernelendphys += md_offset;
+			endkernel = ulmax(endkernel, kernelendphys);
 #ifdef DDB
 			ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 			ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);


More information about the svn-src-all mailing list