svn commit: r279112 - user/nwhitehorn/ppc64-pmap-rework/aim

Nathan Whitehorn nwhitehorn at FreeBSD.org
Sat Feb 21 21:59:05 UTC 2015


Author: nwhitehorn
Date: Sat Feb 21 21:59:04 2015
New Revision: 279112
URL: https://svnweb.freebsd.org/changeset/base/279112

Log:
  Simplicity and correctness improvements to native page table access.

Modified:
  user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c

Modified: user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c
==============================================================================
--- user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c	Sat Feb 21 21:28:45 2015	(r279111)
+++ user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c	Sat Feb 21 21:59:04 2015	(r279112)
@@ -196,10 +196,10 @@ static int64_t	moea64_pte_unset_native(m
 /*
  * Utility routines.
  */
-static void		moea64_bootstrap_native(mmu_t mmup, 
-			    vm_offset_t kernelstart, vm_offset_t kernelend);
-static void		moea64_cpu_bootstrap_native(mmu_t, int ap);
-static void		tlbia(void);
+static void	moea64_bootstrap_native(mmu_t mmup, 
+		    vm_offset_t kernelstart, vm_offset_t kernelend);
+static void	moea64_cpu_bootstrap_native(mmu_t, int ap);
+static void	tlbia(void);
 
 static mmu_method_t moea64_native_methods[] = {
 	/* Internal interfaces */
@@ -256,8 +256,6 @@ moea64_pte_clear_native(mmu_t mmu, struc
 
 	moea64_pte_from_pvo(pvo, &properpt);
 
-	/* See "Resetting the Reference Bit" in arch manual */
-
 	rw_rlock(&moea64_eviction_lock);
 	if ((pt->pte_hi & LPTE_AVPN_MASK) !=
 	    (properpt.pte_hi & LPTE_AVPN_MASK)) {
@@ -266,52 +264,29 @@ moea64_pte_clear_native(mmu_t mmu, struc
 		return (-1);
 	}
 
-	/*
-	 * As shown in Section 7.6.3.2.3
-	 */
-	PTESYNC();
-	ptelo = atomic_swap_32((volatile int32_t *)(&pt->pte_lo) + 1,
-	    (pt->pte_lo & ~ptebit) & 0xffffffff);
-	ptelo |= pt->pte_lo;
-	rw_runlock(&moea64_eviction_lock);
+	if (ptebit == LPTE_REF) {
+		/* See "Resetting the Reference Bit" in arch manual */
+		PTESYNC();
+		/* 2-step here safe: precision is not guaranteed */
+		ptelo |= pt->pte_lo;
+
+		/* One-byte store to avoid touching the C bit */
+		((volatile uint8_t *)(&pt->pte_lo))[6] =
+		    ((uint8_t *)(&properpt.pte_lo))[6];
+		rw_runlock(&moea64_eviction_lock);
 
-	critical_enter();
-	TLBIE(pvo->pvo_vpn);
-	critical_exit();
+		critical_enter();
+		TLBIE(pvo->pvo_vpn);
+		critical_exit();
+	} else {
+		rw_runlock(&moea64_eviction_lock);
+		ptelo = moea64_pte_unset_native(mmu, pvo);
+		moea64_pte_insert_native(mmu, pvo);
+	}
 
 	return (ptelo & (LPTE_REF | LPTE_CHG));
 }
 
-static int
-moea64_pte_set_native(struct lpte *pvo_pt, uintptr_t offset)
-{
-	volatile struct lpte *pt = moea64_pteg_table + offset;
-	int result;
-
-	result = atomic_cmpset_32((volatile int32_t *)(&pt->pte_hi) + 1,
-	    pt->pte_hi & ~(LPTE_LOCKED | LPTE_VALID), LPTE_LOCKED);
-	if (!result)
-		return (-1);
-
-	/*
-	 * Update the PTE as defined in section 7.6.3.1.
-	 */
-	pt->pte_lo = pvo_pt->pte_lo;
-	EIEIO();
-#ifdef __powerpc64__
-	pt->pte_hi = pvo_pt->pte_hi;
-#else
-	*(volatile int32_t *)(&pt->pte_hi) = pvo_pt->pte_hi >> 32;
-	EIEIO();
-	*((volatile int32_t *)(&pt->pte_hi) + 1) = pvo_pt->pte_hi & 0xffffffff;
-#endif
-	PTESYNC();
-
-	/* Keep statistics */
-	moea64_pte_valid++;
-	return (0);
-}
-
 static int64_t
 moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *pvo)
 {
@@ -332,7 +307,8 @@ moea64_pte_unset_native(mmu_t mmu, struc
 	}
 
 	/*
-	 * Invalidate the pte.
+	 * Invalidate the pte, briefly locking it to collect RC bits. No
+	 * atomics needed since this is protected against eviction by the lock.
 	 */
 	isync();
 	critical_enter();
@@ -355,41 +331,30 @@ moea64_pte_replace_native(mmu_t mmu, str
 {
 	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
 	struct lpte properpt;
-	int result;
-	volatile int32_t lowbits;
+	int64_t ptelo;
 
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 
-	moea64_pte_from_pvo(pvo, &properpt);
-
-	result = atomic_cmpset_32((volatile int32_t *)(&pt->pte_hi) + 1,
-	    properpt.pte_hi & 0xffffffff, (properpt.pte_hi | LPTE_LOCKED)
-	     & 0xffffffff);
-
-	if (result && flags == 0) {
-		/* Just some software bits changing. The order in which this is
-		 * done is important on 32-bit systems. */
-		*(volatile int32_t *)(&pt->pte_hi) = properpt.pte_hi >> 32;
-		*(volatile int32_t *)(&pt->pte_lo) = properpt.pte_lo >> 32;
-		pt->pte_lo = properpt.pte_lo & 0xffffffff;
-		lowbits = atomic_swap_32((volatile int32_t *)(&pt->pte_lo) + 1,
-		    *((int32_t *)(&properpt.pte_lo) + 1));
-		EIEIO();
-		*((volatile int32_t *)(&pt->pte_hi) + 1) =
-		    properpt.pte_hi & 0xffffffff;
-		return (lowbits & (LPTE_REF | LPTE_CHG));
+	if (flags == 0) {
+		/* Just some software bits changing. */
+		moea64_pte_from_pvo(pvo, &properpt);
+
+		rw_rlock(&moea64_eviction_lock);
+		if ((pt->pte_hi & LPTE_AVPN_MASK) !=
+		    (properpt.pte_hi & LPTE_AVPN_MASK)) {
+			rw_runlock(&moea64_eviction_lock);
+			return (-1);
+		}
+		pt->pte_hi = properpt.pte_hi;
+		ptelo = pt->pte_lo;
+		rw_runlock(&moea64_eviction_lock);
+	} else {
+		/* Otherwise, need reinsertion and deletion */
+		ptelo = moea64_pte_unset_native(mmu, pvo);
+		moea64_pte_insert_native(mmu, pvo);
 	}
 
-	/*
-	 * Anything else requires invalidation and reinsertion, so just do that
-	 */
-
-	lowbits = -1;
-	if (result)
-		lowbits = moea64_pte_unset_native(mmu, pvo);
-	moea64_pte_insert_native(mmu, pvo);
-
-	return (lowbits);
+	return (ptelo);
 }
 
 static void
@@ -524,38 +489,106 @@ tlbia(void)
 	TLBSYNC();
 }
 
-static __inline int
-moea64_pte_spillable_ident(uintptr_t slotbase)
+static int
+atomic_pte_lock(volatile struct lpte *pte, uint64_t bitmask, uint64_t *oldhi)
+{
+	int	ret;
+	uint32_t oldhihalf;
+
+	/*
+	 * Note: in principle, if just the locked bit were set here, we
+	 * could avoid needing the eviction lock. However, eviction occurs
+	 * so rarely that it isn't worth bothering about in practice.
+	 */
+
+	__asm __volatile (
+		"1:\tlwarx %1, 0, %3\n\t"	/* load old value */
+		"and. %0,%1,%4\n\t"		/* check if any bits set */
+		"bne 2f\n\t"			/* exit if any set */
+		"stwcx. %5, 0, %3\n\t"      	/* attempt to store */
+		"bne- 1b\n\t"			/* spin if failed */
+		"li %0, 1\n\t"			/* success - retval = 1 */
+		"b 3f\n\t"			/* we've succeeded */
+		"2:\n\t"
+		"stwcx. %1, 0, %3\n\t"       	/* clear reservation (74xx) */
+		"li %0, 0\n\t"			/* failure - retval = 0 */
+		"3:\n\t"
+		: "=&r" (ret), "=&r"(oldhihalf), "=m" (pte->pte_hi)
+		: "r" ((volatile char *)&pte->pte_hi + 4), "r" (bitmask),
+		  "r" (LPTE_LOCKED), "m" (pte->pte_hi)
+		: "cr0", "cr1", "cr2", "memory");
+
+	*oldhi = (pte->pte_hi & 0xffffffff00000000ULL) | oldhihalf;
+
+	return (ret);
+}
+
+static uintptr_t
+moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase,
+    uint64_t mask)
 {
-	volatile struct	lpte *pt;
-	int	i, j;
+	volatile struct lpte *pt;
+	uint64_t oldptehi, va;
 	uintptr_t k;
+	int i, j;
 
 	/* Start at a random slot */
 	i = mftb() % 8;
-	k = -1;
 	for (j = 0; j < 8; j++) {
-		pt = &moea64_pteg_table[slotbase + (i + j) % 8];
-		if (pt->pte_hi & LPTE_WIRED)
-			continue;
-
-		/* This is a candidate, so remember it */
-		k = (i + j) % 8;
-
-		/* Try to get a page that has not been used lately */
-		if (!(pt->pte_lo & LPTE_REF) || !(pt->pte_hi & LPTE_VALID))
-			return (k);
+		k = slotbase + (i + j) % 8;
+		pt = &moea64_pteg_table[k];
+		/* Invalidate and seize lock only if no bits in mask set */
+		if (atomic_pte_lock(pt, mask, &oldptehi)) /* Lock obtained */
+			break;
 	}
-	
+
+	if (j == 8)
+		return (-1);
+
+	if (oldptehi & LPTE_VALID) {
+		KASSERT(!(oldptehi & LPTE_WIRED), ("Unmapped wired entry"));
+		/*
+		 * Need to invalidate old entry completely: see
+		 * "Modifying a Page Table Entry". Need to reconstruct
+		 * the virtual address for the outgoing entry to do that.
+		 */
+		if (oldptehi & LPTE_BIG)
+			va = oldptehi >> moea64_large_page_shift;
+		else
+			va = oldptehi >> ADDR_PIDX_SHFT;
+		if (oldptehi & LPTE_HID)
+			va = (((k >> 3) ^ moea64_pteg_mask) ^ va) &
+			    VSID_HASH_MASK;
+		else
+			va = ((k >> 3) ^ va) & VSID_HASH_MASK;
+		va |= (oldptehi & LPTE_AVPN_MASK) <<
+		    (ADDR_API_SHFT64 - ADDR_PIDX_SHFT);
+		PTESYNC();
+		TLBIE(va);
+		moea64_pte_valid--;
+		moea64_pte_overflow++;
+	}
+
+	/*
+	 * Update the PTE as per "Adding a Page Table Entry". Lock is released
+	 * by setting the high doubleworld.
+	 */
+	pt->pte_lo = pvo_pt->pte_lo;
+	EIEIO();
+	pt->pte_hi = pvo_pt->pte_hi;
+	PTESYNC();
+
+	/* Keep statistics */
+	moea64_pte_valid++;
+
 	return (k);
 }
 
 static int
 moea64_pte_insert_native(mmu_t mmu, struct pvo_entry *pvo)
 {
-	volatile struct	lpte *pt;
-	struct	lpte insertpt;
-	int	i;
+	struct lpte insertpt;
+	uintptr_t slot;
 
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 
@@ -568,17 +601,13 @@ moea64_pte_insert_native(mmu_t mmu, stru
 	/*
 	 * First try primary hash.
 	 */
-	pvo->pvo_pte.slot &= ~7UL; /* Base slot address */
-	for (pt = &moea64_pteg_table[pvo->pvo_pte.slot], i = 0; i < 8;
-	    i++, pt++) {
-		if (!(pt->pte_hi & LPTE_VALID)) {
-			if (moea64_pte_set_native(&insertpt,
-			    pvo->pvo_pte.slot + i) == 0) {
-				rw_runlock(&moea64_eviction_lock);
-				pvo->pvo_pte.slot += i;
-				return (0);
-			}
-		}
+	pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
+	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
+	    LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
+	if (slot != -1) {
+		rw_runlock(&moea64_eviction_lock);
+		pvo->pvo_pte.slot = slot;
+		return (0);
 	}
 
 	/*
@@ -587,17 +616,12 @@ moea64_pte_insert_native(mmu_t mmu, stru
 	pvo->pvo_vaddr ^= PVO_HID;
 	insertpt.pte_hi ^= LPTE_HID;
 	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
-
-	for (pt = &moea64_pteg_table[pvo->pvo_pte.slot], i = 0; i < 8;
-	    i++, pt++) {
-		if (!(pt->pte_hi & LPTE_VALID)) {
-			if (moea64_pte_set_native(&insertpt,
-			    pvo->pvo_pte.slot + i) == 0) {
-				rw_runlock(&moea64_eviction_lock);
-				pvo->pvo_pte.slot += i;
-				return (0);
-			}
-		}
+	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
+	    LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
+	if (slot != -1) {
+		rw_runlock(&moea64_eviction_lock);
+		pvo->pvo_pte.slot = slot;
+		return (0);
 	}
 
 	/*
@@ -610,51 +634,29 @@ moea64_pte_insert_native(mmu_t mmu, stru
 		rw_wlock(&moea64_eviction_lock);
 	}
 
-	i = moea64_pte_spillable_ident(pvo->pvo_pte.slot);
-	if (i < 0) {
-		/* Try other hash table? */
-		pvo->pvo_vaddr ^= PVO_HID;
-		insertpt.pte_hi ^= LPTE_HID;
-		pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
-		i = moea64_pte_spillable_ident(pvo->pvo_pte.slot);
+	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
+	    LPTE_WIRED | LPTE_LOCKED);
+	if (slot != -1) {
+		rw_wunlock(&moea64_eviction_lock);
+		pvo->pvo_pte.slot = slot;
+		return (0);
 	}
 
-	if (i < 0) {
-		/* No freeable slots in either PTEG? We're hosed. */
+	/* Try other hash table. Now we're getting desperate... */
+	pvo->pvo_vaddr ^= PVO_HID;
+	insertpt.pte_hi ^= LPTE_HID;
+	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
+	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
+	    LPTE_WIRED | LPTE_LOCKED);
+	if (slot != -1) {
 		rw_wunlock(&moea64_eviction_lock);
-		panic("moea64_pte_insert: overflow");
-		return (-1);
+		pvo->pvo_pte.slot = slot;
+		return (0);
 	}
 
-	pvo->pvo_pte.slot += i;
-
-	/*
-	 * Collect ref/changed bits from the victim and replace it.
-	 */
-	pt = &moea64_pteg_table[pvo->pvo_pte.slot];
-
-	/*
-	 * Invalidate the pte.
-	 */
-	isync();
-	critical_enter();
-	pt->pte_hi &= ~LPTE_VALID;
-	PTESYNC();
-	TLBIE((pt->pte_hi & LPTE_AVPN_MASK) <<
-	    (ADDR_API_SHFT64 - ADDR_PIDX_SHFT));
-	critical_exit();
-	
-	/* New translation */
-	pt->pte_lo = insertpt.pte_lo;
-	EIEIO();
-	pt->pte_hi = insertpt.pte_hi;
-	PTESYNC();
-
+	/* No freeable slots in either PTEG? We're hosed. */
 	rw_wunlock(&moea64_eviction_lock);
-
-	/* Keep statistics */
-	moea64_pte_valid++;
-
-	return (0);
+	panic("moea64_pte_insert: overflow");
+	return (-1);
 }
 


More information about the svn-src-user mailing list