svn commit: r298237 - in head/sys: dev/dpaa powerpc/booke powerpc/include powerpc/mpc85xx powerpc/powerpc

Justin Hibbits jhibbits at FreeBSD.org
Tue Apr 19 01:48:20 UTC 2016


Author: jhibbits
Date: Tue Apr 19 01:48:18 2016
New Revision: 298237
URL: https://svnweb.freebsd.org/changeset/base/298237

Log:
  Fix SMP booting for PowerPC Book-E
  
  Summary:
  PowerPC Book-E SMP is currently broken for unknown reasons.  Pull in
  Semihalf changes made c2012 for e500mc/e5500, which enables booting SMP.
  
  This eliminates the shared software TLB1 table, replacing it with
  tlb1_read_entry() function.
  
  This does not yet support ePAPR SMP booting, and doesn't handle resetting CPUs
  already released (ePAPR boot releases APs to a spin loop waiting on a specific
  address).  This will be addressed in the near future by using the MPIC to reset
  the AP into our own alternate boot address.
  
  This does include a change to the dpaa/dtsec(4) driver, to mark the portals as
  CPU-private.
  
  Test Plan:
  Tested on Amiga X5000/20 (P5020).  Boots, prints the following
  messages:
  
   Adding CPU 0, pir=0, awake=1
   Waking up CPU 1 (dev=1)
   Adding CPU 1, pir=20, awake=1
   SMP: AP CPU #1 launched
  
  top(1) shows CPU1 active.
  
  Obtained from:	Semihalf
  Relnotes:	Yes
  Differential Revision: https://reviews.freebsd.org/D5945

Modified:
  head/sys/dev/dpaa/portals_common.c
  head/sys/powerpc/booke/locore.S
  head/sys/powerpc/booke/pmap.c
  head/sys/powerpc/include/tlb.h
  head/sys/powerpc/mpc85xx/platform_mpc85xx.c
  head/sys/powerpc/powerpc/genassym.c
  head/sys/powerpc/powerpc/mp_machdep.c

Modified: head/sys/dev/dpaa/portals_common.c
==============================================================================
--- head/sys/dev/dpaa/portals_common.c	Tue Apr 19 01:25:35 2016	(r298236)
+++ head/sys/dev/dpaa/portals_common.c	Tue Apr 19 01:48:18 2016	(r298237)
@@ -75,8 +75,6 @@ dpaa_portal_alloc_res(device_t dev, stru
 		sc->sc_rres[0] = bus_alloc_resource(dev,
 		    SYS_RES_MEMORY, &sc->sc_rrid[0], rle->start + sc->sc_dp_pa,
 		    rle->end + sc->sc_dp_pa, rle->count, RF_ACTIVE);
-		pmap_change_attr((vm_offset_t)rman_get_bushandle(sc->sc_rres[0]),
-		    rle->count, VM_MEMATTR_CACHEABLE);
 		if (sc->sc_rres[0] == NULL) {
 			device_printf(dev, "Could not allocate memory.\n");
 			return (ENXIO);

Modified: head/sys/powerpc/booke/locore.S
==============================================================================
--- head/sys/powerpc/booke/locore.S	Tue Apr 19 01:25:35 2016	(r298236)
+++ head/sys/powerpc/booke/locore.S	Tue Apr 19 01:48:18 2016	(r298237)
@@ -104,6 +104,10 @@ __start:
 	mtmsr	%r3
 	isync
 
+/*
+ * Initial HIDs configuration
+ */
+1:
 	mfpvr	%r3
 	rlwinm	%r3, %r3, 16, 16, 31
 
@@ -161,7 +165,6 @@ __start:
 /*
  * Create temporary mapping in AS=1 and switch to it
  */
-	addi	%r3, %r29, 1
 	bl	tlb1_temp_mapping_as1
 
 	mfmsr	%r3
@@ -197,7 +200,7 @@ __start:
 	lis	%r3, KERNBASE at h
 	ori	%r3, %r3, KERNBASE at l	/* EPN = KERNBASE */
 #ifdef SMP
-	ori	%r3, %r3, MAS2_M at l	/* WIMGE = 0b00100 */
+	ori	%r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
 #endif
 	mtspr	SPR_MAS2, %r3
 	isync
@@ -295,21 +298,19 @@ done_mapping:
 __boot_page:
 	bl	1f
 
-	.globl	bp_ntlb1s
-bp_ntlb1s:
+	.globl	bp_trace
+bp_trace:
 	.long	0
 
-	.globl	bp_tlb1
-bp_tlb1:
-	.space	4 * 3 * 64
-
-	.globl	bp_tlb1_end
-bp_tlb1_end:
+	.globl	bp_kernload
+bp_kernload:
+	.long	0
 
 /*
  * Initial configuration
  */
-1:	mflr	%r31		/* r31 hold the address of bp_ntlb1s */
+1:
+	mflr    %r31		/* r31 hold the address of bp_trace */
 
 	/* Set HIDs */
 	mfpvr	%r3
@@ -332,20 +333,7 @@ bp_tlb1_end:
 3:
 	mtspr	SPR_HID0, %r4
 	isync
-/*
- * E500mc and E5500 do not have HID1 register, so skip HID1 setup on
- * this core.
- */
-	cmpli	0, 0, %r3, FSL_E500mc
-	beq	1f
-	cmpli	0, 0, %r3, FSL_E5500
-	beq	1f
 
-	lis	%r3, HID1_E500_DEFAULT_SET at h
-	ori	%r3, %r3, HID1_E500_DEFAULT_SET at l
-	mtspr	SPR_HID1, %r3
-	isync
-1:
 	/* Enable branch prediction */
 	li	%r3, BUCSR_BPEN
 	mtspr	SPR_BUCSR, %r3
@@ -367,7 +355,7 @@ bp_tlb1_end:
 /*
  * Create temporary translation in AS=1 and switch to it
  */
-	lwz	%r3, 0(%r31)
+
 	bl	tlb1_temp_mapping_as1
 
 	mfmsr	%r3
@@ -388,39 +376,46 @@ bp_tlb1_end:
 /*
  * Setup final mapping in TLB1[1] and switch to it
  */
-	lwz	%r6, 0(%r31)
-	addi	%r5, %r31, 4
-	li	%r4, 0
-
-4:	lis	%r3, MAS0_TLBSEL1 at h
-	rlwimi	%r3, %r4, 16, 12, 15
+	/* Final kernel mapping, map in 64 MB of RAM */
+	lis	%r3, MAS0_TLBSEL1 at h	/* Select TLB1 */
+	li	%r4, 0			/* Entry 0 */
+	rlwimi	%r3, %r4, 16, 4, 15
 	mtspr	SPR_MAS0, %r3
 	isync
-	lwz	%r3, 0(%r5)
-	mtspr	SPR_MAS1, %r3
+
+	li	%r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l
+	oris	%r3, %r3, (MAS1_VALID | MAS1_IPROT)@h
+	mtspr	SPR_MAS1, %r3		/* note TS was not filled, so it's TS=0 */
 	isync
-	lwz	%r3, 4(%r5)
+
+	lis	%r3, KERNBASE at h
+	ori	%r3, %r3, KERNBASE at l	/* EPN = KERNBASE */
+	ori	%r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
 	mtspr	SPR_MAS2, %r3
 	isync
-	lwz	%r3, 8(%r5)
+
+	/* Retrieve kernel load [physical] address from bp_kernload */
+	bl	4f
+	.long	bp_kernload
+	.long	__boot_page
+4:	mflr	%r3
+	lwz	%r4, 0(%r3)
+	lwz	%r5, 4(%r3)
+	rlwinm	%r3, %r3, 0, 0, 19
+	sub	%r4, %r4, %r5	/* offset of bp_kernload within __boot_page */
+	lwzx	%r3, %r4, %r3
+
+	/* Set RPN and protection */
+	ori	%r3, %r3, (MAS3_SX | MAS3_SW | MAS3_SR)@l
 	mtspr	SPR_MAS3, %r3
 	isync
 	tlbwe
 	isync
 	msync
-	addi	%r5, %r5, 12
-	addi	%r4, %r4, 1
-	cmpw	%r4, %r6
-	blt	4b
 
 	/* Switch to the final mapping */
 	bl	5f
-	.long __boot_page-.
-5:	mflr	%r5
-	lwz	%r3,0(%r3)
-	add	%r5,%r5,%r3		/* __boot_page in r5 */
-	bl	6f
-6:	mflr	%r3
+5:	mflr	%r3
 	rlwinm	%r3, %r3, 0, 0xfff	/* Offset from boot page start */
 	add	%r3, %r3, %r5		/* Make this virtual address */
 	addi	%r3, %r3, 32
@@ -449,6 +444,7 @@ bp_tlb1_end:
 1:	mflr	%r1
 	lwz	%r2,0(%r1)
 	add	%r1,%r1,%r2
+	stw	%r1, 0(%r1)
 	addi	%r1, %r1, (TMPSTACKSZ - 16)
 
 /*
@@ -479,6 +475,7 @@ bp_tlb1_end:
 6:	b	6b
 #endif /* SMP */
 
+#if defined (BOOKE_E500)
 /*
  * Invalidate all entries in the given TLB.
  *
@@ -508,7 +505,7 @@ tlb1_find_current:
 	isync
 	tlbsx	0, %r3
 	mfspr	%r17, SPR_MAS0
-	rlwinm	%r29, %r17, 16, 20, 31		/* MAS0[ESEL] -> r29 */
+	rlwinm	%r29, %r17, 16, 26, 31		/* MAS0[ESEL] -> r29 */
 
 	/* Make sure we have IPROT set on the entry */
 	mfspr	%r17, SPR_MAS1
@@ -541,14 +538,11 @@ tlb1_inval_entry:
 	blr
 
 /*
- * r3		entry of temp translation
- * r29		entry of current translation
- * r28		returns temp entry passed in r3
- * r4-r5	scratched
+ * r29		current entry number
+ * r28		returned temp entry
+ * r3-r5	scratched
  */
 tlb1_temp_mapping_as1:
-	mr	%r28, %r3
-
 	/* Read our current translation */
 	lis	%r3, MAS0_TLBSEL1 at h	/* Select TLB1 */
 	rlwimi	%r3, %r29, 16, 10, 15	/* Select our current entry */
@@ -556,8 +550,14 @@ tlb1_temp_mapping_as1:
 	isync
 	tlbre
 
-	/* Prepare and write temp entry */
+	/*
+	 * Prepare and write temp entry
+	 *
+	 * FIXME this is not robust against overflow i.e. when the current
+	 * entry is the last in TLB1
+	 */
 	lis	%r3, MAS0_TLBSEL1 at h	/* Select TLB1 */
+	addi	%r28, %r29, 1		/* Use next entry. */
 	rlwimi	%r3, %r28, 16, 10, 15	/* Select temp entry */
 	mtspr	SPR_MAS0, %r3
 	isync
@@ -640,8 +640,19 @@ zero_mas8:
 	mtspr	SPR_MAS8, %r20
 	isync
 	blr
+#endif
 
 #ifdef SMP
+.globl __boot_tlb1
+	/*
+	 * The __boot_tlb1 table is used to hold BSP TLB1 entries
+	 * marked with _TLB_ENTRY_SHARED flag during AP bootstrap.
+	 * The BSP fills in the table in tlb_ap_prep() function. Next,
+	 * AP loads its contents to TLB1 hardware in pmap_bootstrap_ap().
+	 */
+__boot_tlb1:
+	.space TLB1_MAX_ENTRIES * TLB_ENTRY_SIZE
+
 __boot_page_padding:
 	/*
 	 * Boot page needs to be exactly 4K, with the last word of this page
@@ -779,14 +790,8 @@ ENTRY(dataloss_erratum_access)
 	mtspr	SPR_L1CSR1, %r11
 	isync
 
-	mflr	%r9
-	bl	1f
-	.long 2f-.
-1:
-	mflr	%r5
-	lwz	%r8, 0(%r5)
-	mtlr	%r9
-	add	%r8, %r8, %r5
+	lis	%r8, 2f at h
+	ori	%r8, %r8, 2f at l
 	icbtls	0, 0, %r8
 	addi	%r9, %r8, 64
 

Modified: head/sys/powerpc/booke/pmap.c
==============================================================================
--- head/sys/powerpc/booke/pmap.c	Tue Apr 19 01:25:35 2016	(r298236)
+++ head/sys/powerpc/booke/pmap.c	Tue Apr 19 01:48:18 2016	(r298237)
@@ -110,10 +110,6 @@ extern unsigned char _end[];
 
 extern uint32_t *bootinfo;
 
-#ifdef SMP
-extern uint32_t bp_ntlb1s;
-#endif
-
 vm_paddr_t kernload;
 vm_offset_t kernstart;
 vm_size_t kernsize;
@@ -187,11 +183,6 @@ uint32_t tlb1_entries;
 #define TLB1_ENTRIES (tlb1_entries)
 #define TLB1_MAXENTRIES	64
 
-/* In-ram copy of the TLB1 */
-static tlb_entry_t tlb1[TLB1_MAXENTRIES];
-
-/* Next free entry in the TLB1 */
-static unsigned int tlb1_idx;
 static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE;
 
 static tlbtid_t tid_alloc(struct pmap *);
@@ -199,7 +190,8 @@ static void tid_flush(tlbtid_t tid);
 
 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t);
 
-static void tlb1_write_entry(unsigned int);
+static void tlb1_read_entry(tlb_entry_t *, unsigned int);
+static void tlb1_write_entry(tlb_entry_t *, unsigned int);
 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *);
 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t);
 
@@ -271,6 +263,7 @@ static vm_offset_t ptbl_buf_pool_vabase;
 static struct ptbl_buf *ptbl_bufs;
 
 #ifdef SMP
+extern tlb_entry_t __boot_tlb1[];
 void pmap_bootstrap_ap(volatile uint32_t *);
 #endif
 
@@ -1369,6 +1362,22 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset
 }
 
 #ifdef SMP
+ void
+tlb1_ap_prep(void)
+{
+	tlb_entry_t *e, tmp;
+	unsigned int i;
+
+	/* Prepare TLB1 image for AP processors */
+	e = __boot_tlb1;
+	for (i = 0; i < TLB1_ENTRIES; i++) {
+		tlb1_read_entry(&tmp, i);
+
+		if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED))
+			memcpy(e++, &tmp, sizeof(tmp));
+	}
+}
+
 void
 pmap_bootstrap_ap(volatile uint32_t *trcp __unused)
 {
@@ -1376,15 +1385,15 @@ pmap_bootstrap_ap(volatile uint32_t *trc
 
 	/*
 	 * Finish TLB1 configuration: the BSP already set up its TLB1 and we
-	 * have the snapshot of its contents in the s/w tlb1[] table, so use
-	 * these values directly to (re)program AP's TLB1 hardware.
-	 */
-	for (i = bp_ntlb1s; i < tlb1_idx; i++) {
-		/* Skip invalid entries */
-		if (!(tlb1[i].mas1 & MAS1_VALID))
-			continue;
-
-		tlb1_write_entry(i);
+	 * have the snapshot of its contents in the s/w __boot_tlb1[] table
+	 * created by tlb1_ap_prep(), so use these values directly to
+	 * (re)program AP's TLB1 hardware.
+	 *
+	 * Start at index 1 because index 0 has the kernel map.
+	 */
+	for (i = 1; i < TLB1_ENTRIES; i++) {
+		if (__boot_tlb1[i].mas1 & MAS1_VALID)
+			tlb1_write_entry(&__boot_tlb1[i], i);
 	}
 
 	set_mas4_defaults();
@@ -1429,14 +1438,16 @@ mmu_booke_extract(mmu_t mmu, pmap_t pmap
 static vm_paddr_t
 mmu_booke_kextract(mmu_t mmu, vm_offset_t va)
 {
+	tlb_entry_t e;
 	int i;
 
 	/* Check TLB1 mappings */
-	for (i = 0; i < tlb1_idx; i++) {
-		if (!(tlb1[i].mas1 & MAS1_VALID))
+	for (i = 0; i < TLB1_ENTRIES; i++) {
+		tlb1_read_entry(&e, i);
+		if (!(e.mas1 & MAS1_VALID))
 			continue;
-		if (va >= tlb1[i].virt && va < tlb1[i].virt + tlb1[i].size)
-			return (tlb1[i].phys + (va - tlb1[i].virt));
+		if (va >= e.virt && va < e.virt + e.size)
+			return (e.phys + (va - e.virt));
 	}
 
 	return (pte_vatopa(mmu, kernel_pmap, va));
@@ -2652,7 +2663,7 @@ mmu_booke_dev_direct_mapped(mmu_t mmu, v
 	 * This currently does not work for entries that
 	 * overlap TLB1 entries.
 	 */
-	for (i = 0; i < tlb1_idx; i ++) {
+	for (i = 0; i < TLB1_ENTRIES; i ++) {
 		if (tlb1_iomapped(i, pa, size, &va) == 0)
 			return (0);
 	}
@@ -2692,28 +2703,36 @@ mmu_booke_dumpsys_unmap(mmu_t mmu, vm_pa
 	vm_paddr_t ppa;
 	vm_offset_t ofs;
 	vm_size_t gran;
+	tlb_entry_t e;
+	int i;
 
 	/* Minidumps are based on virtual memory addresses. */
 	/* Nothing to do... */
 	if (do_minidump)
 		return;
 
+	for (i = 0; i < TLB1_ENTRIES; i++) {
+		tlb1_read_entry(&e, i);
+		if (!(e.mas1 & MAS1_VALID))
+			break;
+	}
+
 	/* Raw physical memory dumps don't have a virtual address. */
-	tlb1_idx--;
-	tlb1[tlb1_idx].mas1 = 0;
-	tlb1[tlb1_idx].mas2 = 0;
-	tlb1[tlb1_idx].mas3 = 0;
-	tlb1_write_entry(tlb1_idx);
+	i--;
+	e.mas1 = 0;
+	e.mas2 = 0;
+	e.mas3 = 0;
+	tlb1_write_entry(&e, i);
 
 	gran = 256 * 1024 * 1024;
 	ppa = pa & ~(gran - 1);
 	ofs = pa - ppa;
 	if (sz > (gran - ofs)) {
-		tlb1_idx--;
-		tlb1[tlb1_idx].mas1 = 0;
-		tlb1[tlb1_idx].mas2 = 0;
-		tlb1[tlb1_idx].mas3 = 0;
-		tlb1_write_entry(tlb1_idx);
+		i--;
+		e.mas1 = 0;
+		e.mas2 = 0;
+		e.mas3 = 0;
+		tlb1_write_entry(&e, i);
 	}
 }
 
@@ -2796,6 +2815,7 @@ mmu_booke_mapdev(mmu_t mmu, vm_paddr_t p
 static void *
 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
 {
+	tlb_entry_t e;
 	void *res;
 	uintptr_t va, tmpva;
 	vm_size_t sz;
@@ -2807,13 +2827,14 @@ mmu_booke_mapdev_attr(mmu_t mmu, vm_padd
 	 * requirement, but now only checks the easy case.
 	 */
 	if (ma == VM_MEMATTR_DEFAULT) {
-		for (i = 0; i < tlb1_idx; i++) {
-			if (!(tlb1[i].mas1 & MAS1_VALID))
+		for (i = 0; i < TLB1_ENTRIES; i++) {
+			tlb1_read_entry(&e, i);
+			if (!(e.mas1 & MAS1_VALID))
 				continue;
-			if (pa >= tlb1[i].phys &&
-			    (pa + size) <= (tlb1[i].phys + tlb1[i].size))
-				return (void *)(tlb1[i].virt +
-				    (vm_offset_t)(pa - tlb1[i].phys));
+			if (pa >= e.phys &&
+			    (pa + size) <= (e.phys + e.size))
+				return (void *)(e.virt +
+				    (vm_offset_t)(pa - e.phys));
 		}
 	}
 
@@ -2846,9 +2867,10 @@ mmu_booke_mapdev_attr(mmu_t mmu, vm_padd
 			} while (va % sz != 0);
 		}
 		if (bootverbose)
-			printf("Wiring VA=%x to PA=%jx (size=%x), "
-			    "using TLB1[%d]\n", va, (uintmax_t)pa, sz, tlb1_idx);
-		tlb1_set_entry(va, pa, sz, tlb_calc_wimg(pa, ma));
+			printf("Wiring VA=%x to PA=%jx (size=%x)\n",
+			    va, (uintmax_t)pa, sz);
+		tlb1_set_entry(va, pa, sz,
+		    _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma));
 		size -= sz;
 		pa += sz;
 		va += sz;
@@ -2912,30 +2934,34 @@ mmu_booke_change_attr(mmu_t mmu, vm_offs
 	vm_offset_t va;
 	pte_t *pte;
 	int i, j;
+	tlb_entry_t e;
 
 	/* Check TLB1 mappings */
-	for (i = 0; i < tlb1_idx; i++) {
-		if (!(tlb1[i].mas1 & MAS1_VALID))
+	for (i = 0; i < TLB1_ENTRIES; i++) {
+		tlb1_read_entry(&e, i);
+		if (!(e.mas1 & MAS1_VALID))
 			continue;
-		if (addr >= tlb1[i].virt && addr < tlb1[i].virt + tlb1[i].size)
+		if (addr >= e.virt && addr < e.virt + e.size)
 			break;
 	}
-	if (i < tlb1_idx) {
+	if (i < TLB1_ENTRIES) {
 		/* Only allow full mappings to be modified for now. */
 		/* Validate the range. */
-		for (j = i, va = addr; va < addr + sz; va += tlb1[j].size, j++) {
-			if (va != tlb1[j].virt || (sz - (va - addr) < tlb1[j].size))
+		for (j = i, va = addr; va < addr + sz; va += e.size, j++) {
+			tlb1_read_entry(&e, j);
+			if (va != e.virt || (sz - (va - addr) < e.size))
 				return (EINVAL);
 		}
-		for (va = addr; va < addr + sz; va += tlb1[i].size, i++) {
-			tlb1[i].mas2 &= ~MAS2_WIMGE_MASK;
-			tlb1[i].mas2 |= tlb_calc_wimg(tlb1[i].phys, mode);
+		for (va = addr; va < addr + sz; va += e.size, i++) {
+			tlb1_read_entry(&e, i);
+			e.mas2 &= ~MAS2_WIMGE_MASK;
+			e.mas2 |= tlb_calc_wimg(e.phys, mode);
 
 			/*
 			 * Write it out to the TLB.  Should really re-sync with other
 			 * cores.
 			 */
-			tlb1_write_entry(i);
+			tlb1_write_entry(&e, i);
 		}
 		return (0);
 	}
@@ -3118,12 +3144,48 @@ tlb0_print_tlbentries(void)
  *		windows, other devices mappings.
  */
 
+ /*
+ * Read an entry from given TLB1 slot.
+ */
+void
+tlb1_read_entry(tlb_entry_t *entry, unsigned int slot)
+{
+	uint32_t mas0;
+
+	KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__));
+
+	mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot);
+	mtspr(SPR_MAS0, mas0);
+	__asm __volatile("isync; tlbre");
+
+	entry->mas1 = mfspr(SPR_MAS1);
+	entry->mas2 = mfspr(SPR_MAS2);
+	entry->mas3 = mfspr(SPR_MAS3);
+
+	switch ((mfpvr() >> 16) & 0xFFFF) {
+	case FSL_E500v2:
+	case FSL_E500mc:
+	case FSL_E5500:
+		entry->mas7 = mfspr(SPR_MAS7);
+		break;
+	default:
+		entry->mas7 = 0;
+		break;
+	}
+
+	entry->virt = entry->mas2 & MAS2_EPN_MASK;
+	entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) |
+	    (entry->mas3 & MAS3_RPN);
+	entry->size =
+	    tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT);
+}
+
 /*
  * Write given entry to TLB1 hardware.
  * Use 32 bit pa, clear 4 high-order bits of RPN (mas7).
  */
 static void
-tlb1_write_entry(unsigned int idx)
+tlb1_write_entry(tlb_entry_t *e, unsigned int idx)
 {
 	uint32_t mas0;
 
@@ -3135,11 +3197,11 @@ tlb1_write_entry(unsigned int idx)
 
 	mtspr(SPR_MAS0, mas0);
 	__asm __volatile("isync");
-	mtspr(SPR_MAS1, tlb1[idx].mas1);
+	mtspr(SPR_MAS1, e->mas1);
 	__asm __volatile("isync");
-	mtspr(SPR_MAS2, tlb1[idx].mas2);
+	mtspr(SPR_MAS2, e->mas2);
 	__asm __volatile("isync");
-	mtspr(SPR_MAS3, tlb1[idx].mas3);
+	mtspr(SPR_MAS3, e->mas3);
 	__asm __volatile("isync");
 	switch ((mfpvr() >> 16) & 0xFFFF) {
 	case FSL_E500mc:
@@ -3148,7 +3210,7 @@ tlb1_write_entry(unsigned int idx)
 		__asm __volatile("isync");
 		/* FALLTHROUGH */
 	case FSL_E500v2:
-		mtspr(SPR_MAS7, tlb1[idx].mas7);
+		mtspr(SPR_MAS7, e->mas7);
 		__asm __volatile("isync");
 		break;
 	default:
@@ -3207,10 +3269,21 @@ int
 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size,
     uint32_t flags)
 {
+	tlb_entry_t e;
 	uint32_t ts, tid;
 	int tsize, index;
 
-	index = atomic_fetchadd_int(&tlb1_idx, 1);
+	for (index = 0; index < TLB1_ENTRIES; index++) {
+		tlb1_read_entry(&e, index);
+		if ((e.mas1 & MAS1_VALID) == 0)
+			break;
+		/* Check if we're just updating the flags, and update them. */
+		if (e.phys == pa && e.virt == va && e.size == size) {
+			e.mas2 = (va & MAS2_EPN_MASK) | flags;
+			tlb1_write_entry(&e, index);
+			return (0);
+		}
+	}
 	if (index >= TLB1_ENTRIES) {
 		printf("tlb1_set_entry: TLB1 full!\n");
 		return (-1);
@@ -3223,23 +3296,18 @@ tlb1_set_entry(vm_offset_t va, vm_paddr_
 	/* XXX TS is hard coded to 0 for now as we only use single address space */
 	ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK;
 
-	/*
-	 * Atomicity is preserved by the atomic increment above since nothing
-	 * is ever removed from tlb1.
-	 */
-
-	tlb1[index].phys = pa;
-	tlb1[index].virt = va;
-	tlb1[index].size = size;
-	tlb1[index].mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
-	tlb1[index].mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
-	tlb1[index].mas2 = (va & MAS2_EPN_MASK) | flags;
+	e.phys = pa;
+	e.virt = va;
+	e.size = size;
+	e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
+	e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
+	e.mas2 = (va & MAS2_EPN_MASK) | flags;
 
 	/* Set supervisor RWX permission bits */
-	tlb1[index].mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
-	tlb1[index].mas7 = (pa >> 32) & MAS7_RPN;
+	e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
+	e.mas7 = (pa >> 32) & MAS7_RPN;
 
-	tlb1_write_entry(index);
+	tlb1_write_entry(&e, index);
 
 	/*
 	 * XXX in general TLB1 updates should be propagated between CPUs,
@@ -3302,7 +3370,8 @@ tlb1_mapin_region(vm_offset_t va, vm_pad
 	for (idx = 0; idx < nents; idx++) {
 		pgsz = pgs[idx];
 		debugf("%u: %llx -> %x, size=%x\n", idx, pa, va, pgsz);
-		tlb1_set_entry(va, pa, pgsz, _TLB_ENTRY_MEM);
+		tlb1_set_entry(va, pa, pgsz,
+		    _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM);
 		pa += pgsz;
 		va += pgsz;
 	}
@@ -3326,9 +3395,6 @@ tlb1_init()
 {
 	uint32_t mas0, mas1, mas2, mas3, mas7;
 	uint32_t tsz;
-	int i;
-
-	tlb1_idx = 1;
 
 	tlb1_get_tlbconf();
 
@@ -3341,27 +3407,11 @@ tlb1_init()
 	mas3 = mfspr(SPR_MAS3);
 	mas7 = mfspr(SPR_MAS7);
 
-	tlb1[0].mas1 = mas1;
-	tlb1[0].mas2 = mfspr(SPR_MAS2);
-	tlb1[0].mas3 = mas3;
-	tlb1[0].mas7 = mas7;
-	tlb1[0].virt = mas2 & MAS2_EPN_MASK;
-	tlb1[0].phys =  ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) |
+	kernload =  ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) |
 	    (mas3 & MAS3_RPN);
 
-	kernload = tlb1[0].phys;
-
 	tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
-	tlb1[0].size = (tsz > 0) ? tsize2size(tsz) : 0;
-	kernsize += tlb1[0].size;
-
-#ifdef SMP
-	bp_ntlb1s = tlb1_idx;
-#endif
-
-	/* Purge the remaining entries */
-	for (i = tlb1_idx; i < TLB1_ENTRIES; i++)
-		tlb1_write_entry(i);
+	kernsize += (tsz > 0) ? tsize2size(tsz) : 0;
 
 	/* Setup TLB miss defaults */
 	set_mas4_defaults();
@@ -3373,15 +3423,17 @@ pmap_early_io_map(vm_paddr_t pa, vm_size
 	vm_paddr_t pa_base;
 	vm_offset_t va, sz;
 	int i;
+	tlb_entry_t e;
 
 	KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!"));
 	
-	for (i = 0; i < tlb1_idx; i++) {
-		if (!(tlb1[i].mas1 & MAS1_VALID))
+	for (i = 0; i < TLB1_ENTRIES; i++) {
+		tlb1_read_entry(&e, i);
+		if (!(e.mas1 & MAS1_VALID))
 			continue;
-		if (pa >= tlb1[i].phys && (pa + size) <=
-		    (tlb1[i].phys + tlb1[i].size))
-			return (tlb1[i].virt + (pa - tlb1[i].phys));
+		if (pa >= e.phys && (pa + size) <=
+		    (e.phys + e.size))
+			return (e.virt + (pa - e.phys));
 	}
 
 	pa_base = rounddown(pa, PAGE_SIZE);
@@ -3391,16 +3443,13 @@ pmap_early_io_map(vm_paddr_t pa, vm_size
 
 	do {
 		sz = 1 << (ilog2(size) & ~1);
-		tlb1_set_entry(tlb1_map_base, pa_base, sz, _TLB_ENTRY_IO);
+		tlb1_set_entry(tlb1_map_base, pa_base, sz,
+		    _TLB_ENTRY_SHARED | _TLB_ENTRY_IO);
 		size -= sz;
 		pa_base += sz;
 		tlb1_map_base += sz;
 	} while (size > 0);
 
-#ifdef SMP
-	bp_ntlb1s = tlb1_idx;
-#endif
-
 	return (va);
 }
 
@@ -3450,20 +3499,6 @@ tlb1_print_tlbentries(void)
 }
 
 /*
- * Print out contents of the in-ram tlb1 table.
- */
-void
-tlb1_print_entries(void)
-{
-	int i;
-
-	debugf("tlb1[] table entries:\n");
-	for (i = 0; i < TLB1_ENTRIES; i++)
-		tlb_print_entry(i, tlb1[i].mas1, tlb1[i].mas2, tlb1[i].mas3,
-		    tlb1[i].mas7);
-}
-
-/*
  * Return 0 if the physical IO range is encompassed by one of the
  * the TLB1 entries, otherwise return related error code.
  */
@@ -3475,39 +3510,41 @@ tlb1_iomapped(int i, vm_paddr_t pa, vm_s
 	vm_paddr_t pa_end;
 	unsigned int entry_tsize;
 	vm_size_t entry_size;
+	tlb_entry_t e;
 
 	*va = (vm_offset_t)NULL;
 
+	tlb1_read_entry(&e, i);
 	/* Skip invalid entries */
-	if (!(tlb1[i].mas1 & MAS1_VALID))
+	if (!(e.mas1 & MAS1_VALID))
 		return (EINVAL);
 
 	/*
 	 * The entry must be cache-inhibited, guarded, and r/w
 	 * so it can function as an i/o page
 	 */
-	prot = tlb1[i].mas2 & (MAS2_I | MAS2_G);
+	prot = e.mas2 & (MAS2_I | MAS2_G);
 	if (prot != (MAS2_I | MAS2_G))
 		return (EPERM);
 
-	prot = tlb1[i].mas3 & (MAS3_SR | MAS3_SW);
+	prot = e.mas3 & (MAS3_SR | MAS3_SW);
 	if (prot != (MAS3_SR | MAS3_SW))
 		return (EPERM);
 
 	/* The address should be within the entry range. */
-	entry_tsize = (tlb1[i].mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+	entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
 	KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize"));
 
 	entry_size = tsize2size(entry_tsize);
-	pa_start = (((vm_paddr_t)tlb1[i].mas7 & MAS7_RPN) << 32) | 
-	    (tlb1[i].mas3 & MAS3_RPN);
+	pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 
+	    (e.mas3 & MAS3_RPN);
 	pa_end = pa_start + entry_size;
 
 	if ((pa < pa_start) || ((pa + size) > pa_end))
 		return (ERANGE);
 
 	/* Return virtual address of this mapping. */
-	*va = (tlb1[i].mas2 & MAS2_EPN_MASK) + (pa - pa_start);
+	*va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start);
 	return (0);
 }
 

Modified: head/sys/powerpc/include/tlb.h
==============================================================================
--- head/sys/powerpc/include/tlb.h	Tue Apr 19 01:25:35 2016	(r298236)
+++ head/sys/powerpc/include/tlb.h	Tue Apr 19 01:48:18 2016	(r298237)
@@ -74,7 +74,7 @@
 #define	MAS2_M			0x00000004
 #define	MAS2_G			0x00000002
 #define	MAS2_E			0x00000001
-#define	MAS2_WIMGE_MASK		0x0000001F
+#define	MAS2_WIMGE_MASK		0x0000007F
 
 #define	MAS3_RPN		0xFFFFF000
 #define	MAS3_RPN_SHIFT		12
@@ -120,9 +120,17 @@
  */
 #define KERNEL_REGION_MAX_TLB_ENTRIES   4
 
+/*
+ * Use MAS2_X0 to mark entries which will be copied
+ * to AP CPUs during SMP bootstrap. As result entries
+ * marked with _TLB_ENTRY_SHARED will be shared by all CPUs.
+ */
+#define _TLB_ENTRY_SHARED	(MAS2_X0)	/* XXX under SMP? */
 #define _TLB_ENTRY_IO	(MAS2_I | MAS2_G)
 #define _TLB_ENTRY_MEM	(MAS2_M)
 
+#define TLB1_MAX_ENTRIES	64
+
 #if !defined(LOCORE)
 typedef struct tlb_entry {
 	vm_paddr_t phys;
@@ -211,6 +219,7 @@ struct pmap;
 
 void tlb_lock(uint32_t *);
 void tlb_unlock(uint32_t *);
+void tlb1_ap_prep(void);
 int  tlb1_set_entry(vm_offset_t, vm_paddr_t, vm_size_t, uint32_t);
 
 #endif /* !LOCORE */

Modified: head/sys/powerpc/mpc85xx/platform_mpc85xx.c
==============================================================================
--- head/sys/powerpc/mpc85xx/platform_mpc85xx.c	Tue Apr 19 01:25:35 2016	(r298236)
+++ head/sys/powerpc/mpc85xx/platform_mpc85xx.c	Tue Apr 19 01:48:18 2016	(r298237)
@@ -62,9 +62,7 @@ __FBSDID("$FreeBSD$");
 extern void *ap_pcpu;
 extern vm_paddr_t kernload;		/* Kernel physical load address */
 extern uint8_t __boot_page[];		/* Boot page body */
-extern uint32_t bp_ntlb1s;
-extern uint32_t bp_tlb1[];
-extern uint32_t bp_tlb1_end[];
+extern uint32_t bp_kernload;
 #endif
 
 extern uint32_t *bootinfo;
@@ -321,10 +319,9 @@ static int
 mpc85xx_smp_start_cpu(platform_t plat, struct pcpu *pc)
 {
 #ifdef SMP
-	uint32_t *tlb1;
 	vm_paddr_t bptr;
 	uint32_t reg;
-	int i, timeout;
+	int timeout;
 	uintptr_t brr;
 	int cpuid;
 
@@ -344,6 +341,7 @@ mpc85xx_smp_start_cpu(platform_t plat, s
 	brr = OCP85XX_EEBPCR;
 	cpuid = pc->pc_cpuid + 24;
 #endif
+	bp_kernload = kernload;
 	reg = ccsr_read4(brr);
 	if ((reg & (1 << cpuid)) != 0) {
 		printf("SMP: CPU %d already out of hold-off state!\n",
@@ -354,20 +352,6 @@ mpc85xx_smp_start_cpu(platform_t plat, s
 	ap_pcpu = pc;
 	__asm __volatile("msync; isync");
 
-	i = 0;
-	tlb1 = bp_tlb1;
-	while (i < bp_ntlb1s && tlb1 < bp_tlb1_end) {
-		mtspr(SPR_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(i));
-		__asm __volatile("isync; tlbre");
-		tlb1[0] = mfspr(SPR_MAS1);
-		tlb1[1] = mfspr(SPR_MAS2);
-		tlb1[2] = mfspr(SPR_MAS3);
-		i++;
-		tlb1 += 3;
-	}
-	if (i < bp_ntlb1s)
-		bp_ntlb1s = i;
-
 	/* Flush caches to have our changes hit DRAM. */
 	cpu_flush_dcache(__boot_page, 4096);
 

Modified: head/sys/powerpc/powerpc/genassym.c
==============================================================================
--- head/sys/powerpc/powerpc/genassym.c	Tue Apr 19 01:25:35 2016	(r298236)
+++ head/sys/powerpc/powerpc/genassym.c	Tue Apr 19 01:48:18 2016	(r298237)
@@ -125,7 +125,7 @@ ASSYM(PM_PDIR, offsetof(struct pmap, pm_
 ASSYM(PTE_RPN, 0);
 ASSYM(PTE_FLAGS, sizeof(uint32_t));
 #if defined(BOOKE_E500)
-ASSYM(TLB0_ENTRY_SIZE, sizeof(struct tlb_entry));
+ASSYM(TLB_ENTRY_SIZE, sizeof(struct tlb_entry));
 #endif
 #endif
 

Modified: head/sys/powerpc/powerpc/mp_machdep.c
==============================================================================
--- head/sys/powerpc/powerpc/mp_machdep.c	Tue Apr 19 01:25:35 2016	(r298236)
+++ head/sys/powerpc/powerpc/mp_machdep.c	Tue Apr 19 01:48:18 2016	(r298237)
@@ -212,6 +212,9 @@ cpu_mp_unleash(void *dummy)
 
 	cpus = 0;
 	smp_cpus = 0;
+#ifdef BOOKE
+	tlb1_ap_prep();
+#endif
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		cpus++;
 		if (!pc->pc_bsp) {


More information about the svn-src-head mailing list