svn commit: r279040 - in user/nwhitehorn/ppc64-pmap-rework: aim conf ps3 pseries

Nathan Whitehorn nwhitehorn at FreeBSD.org
Fri Feb 20 05:17:26 UTC 2015


Author: nwhitehorn
Date: Fri Feb 20 05:17:24 2015
New Revision: 279040
URL: https://svnweb.freebsd.org/changeset/base/279040

Log:
  Rearrange handling of reference and changed bits from pages leaving the
  page table either by unset or eviction so that the page table methods
  don't need to keep any state at all. This is helpful for some hypervisors
  that do not allow you to read the mapped real address of a given page table
  entry when removing it.
  
  This enables porting the PS3 MMU code to the new framework (compile-tested
  only).

Modified:
  user/nwhitehorn/ppc64-pmap-rework/aim/mmu_oea64.c
  user/nwhitehorn/ppc64-pmap-rework/aim/mmu_oea64.h
  user/nwhitehorn/ppc64-pmap-rework/aim/moea64_if.m
  user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c
  user/nwhitehorn/ppc64-pmap-rework/conf/GENERIC64
  user/nwhitehorn/ppc64-pmap-rework/ps3/mmu_ps3.c
  user/nwhitehorn/ppc64-pmap-rework/pseries/mmu_phyp.c

Modified: user/nwhitehorn/ppc64-pmap-rework/aim/mmu_oea64.c
==============================================================================
--- user/nwhitehorn/ppc64-pmap-rework/aim/mmu_oea64.c	Fri Feb 20 02:43:08 2015	(r279039)
+++ user/nwhitehorn/ppc64-pmap-rework/aim/mmu_oea64.c	Fri Feb 20 05:17:24 2015	(r279040)
@@ -420,18 +420,6 @@ moea64_pte_from_pvo(const struct pvo_ent
 		lpte->pte_lo |= LPTE_NOEXEC;
 }
 
-void
-moea64_sync_refchg(uint64_t lpte_lo)
-{
-	vm_page_t pg;
-
-	pg = PHYS_TO_VM_PAGE(lpte_lo & LPTE_RPGN);
-	if (pg == NULL || (pg->oflags & VPO_UNMANAGED))
-		return;
-
-	atomic_set_32(&pg->md.mdpg_attrs, lpte_lo & (LPTE_REF | LPTE_CHG));
-}
-
 static __inline uint64_t
 moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma)
 {
@@ -963,7 +951,7 @@ moea64_late_bootstrap(mmu_t mmup, vm_off
 
 	/*
 	 * Allocate some things for page zeroing. We put this directly
-	 * in the page table and use MOEA64_PTE_REPLACE to avoid
+	 * in the page table and use MOEA64_PTE_REPLACE to avoid any
 	 * of the PVO book-keeping or other parts of the VM system
 	 * from even knowing that this hack exists.
 	 */
@@ -1020,6 +1008,8 @@ void
 moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
 {
 	struct	pvo_entry key, *pvo;
+	vm_page_t m;
+	int64_t	refchg;
 
 	key.pvo_vaddr = sva;
 	PMAP_LOCK(pm);
@@ -1030,7 +1020,19 @@ moea64_unwire(mmu_t mmu, pmap_t pm, vm_o
 			panic("moea64_unwire: pvo %p is missing PVO_WIRED",
 			    pvo);
 		pvo->pvo_vaddr &= ~PVO_WIRED;
-		MOEA64_PTE_REPLACE(mmu, pvo, 0 /* Doesn't need invalidation */);
+		refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */);
+		if ((pvo->pvo_vaddr & PVO_MANAGED) &&
+		    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
+			if (refchg < 0)
+				refchg = LPTE_CHG;
+			m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
+
+			refchg |= atomic_readandclear_32(&m->md.mdpg_attrs);
+			if (refchg & LPTE_CHG)
+				vm_page_dirty(m);
+			if (refchg & LPTE_REF)
+				vm_page_aflag_set(m, PGA_REFERENCED);
+		}
 		pm->pm_stats.wired_count--;
 	}
 	PMAP_UNLOCK(pm);
@@ -1567,6 +1569,7 @@ void
 moea64_remove_write(mmu_t mmu, vm_page_t m)
 {
 	struct	pvo_entry *pvo;
+	int64_t	refchg, ret;
 	pmap_t	pmap;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
@@ -1582,19 +1585,24 @@ moea64_remove_write(mmu_t mmu, vm_page_t
 		return;
 	powerpc_sync();
 	PV_PAGE_LOCK(m);
+	refchg = 0;
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		if (!(pvo->pvo_vaddr & PVO_DEAD) &&
 		    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
 			pvo->pvo_pte.prot &= ~VM_PROT_WRITE;
-			MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE);
+			ret = MOEA64_PTE_REPLACE(mmu, pvo,
+			    MOEA64_PTE_PROT_UPDATE);
+			if (ret < 0)
+				ret = LPTE_CHG;
+			refchg |= ret;
 			if (pvo->pvo_pmap == kernel_pmap)
 				isync();
 		}
 		PMAP_UNLOCK(pmap);
 	}
-	if (atomic_readandclear_32(&m->md.mdpg_attrs) & LPTE_CHG)
+	if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG)
 		vm_page_dirty(m);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	PV_PAGE_UNLOCK(m);
@@ -1628,6 +1636,7 @@ void
 moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma)
 {
 	struct	pvo_entry *pvo;
+	int64_t	refchg;
 	pmap_t	pmap;
 	uint64_t lo;
 
@@ -1645,7 +1654,20 @@ moea64_page_set_memattr(mmu_t mmu, vm_pa
 		if (!(pvo->pvo_vaddr & PVO_DEAD)) {
 			pvo->pvo_pte.pa &= ~LPTE_WIMG;
 			pvo->pvo_pte.pa |= lo;
-			MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE);
+			refchg = MOEA64_PTE_REPLACE(mmu, pvo,
+			    MOEA64_PTE_INVALIDATE);
+			if (refchg < 0)
+				refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ?
+				    LPTE_CHG : 0;
+			if ((pvo->pvo_vaddr & PVO_MANAGED) &&
+			    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
+				refchg |=
+				    atomic_readandclear_32(&m->md.mdpg_attrs);
+				if (refchg & LPTE_CHG)
+					vm_page_dirty(m);
+				if (refchg & LPTE_REF)
+					vm_page_aflag_set(m, PGA_REFERENCED);
+			}
 			if (pvo->pvo_pmap == kernel_pmap)
 				isync();
 		}
@@ -1935,7 +1957,9 @@ moea64_pvo_protect(mmu_t mmu,  pmap_t pm
 	/*
 	 * If the PVO is in the page table, update mapping
 	 */
-	MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE);
+	refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE);
+	if (refchg < 0)
+		refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0;
 
 	if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) &&
 	    (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
@@ -1950,8 +1974,8 @@ moea64_pvo_protect(mmu_t mmu,  pmap_t pm
 	 * removed write access.
 	 */
 	if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) &&
-	    (oldprot & VM_PROT_WRITE) && !(prot & VM_PROT_WRITE)) {
-		refchg = atomic_readandclear_32(&pg->md.mdpg_attrs);
+	    (oldprot & VM_PROT_WRITE)) {
+		refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
 		if (refchg & LPTE_CHG)
 			vm_page_dirty(pg);
 		if (refchg & LPTE_REF)
@@ -2274,7 +2298,17 @@ moea64_pvo_remove_from_pmap(mmu_t mmu, s
 	/*
 	 * If there is an active pte entry, we need to deactivate it
 	 */
-	MOEA64_PTE_UNSET(mmu, pvo);
+	refchg = MOEA64_PTE_UNSET(mmu, pvo);
+	if (refchg < 0) {
+		/*
+		 * If it was evicted from the page table, be pessimistic and
+		 * dirty the page.
+		 */
+		if (pvo->pvo_pte.prot & VM_PROT_WRITE)
+			refchg = LPTE_CHG;
+		else
+			refchg = 0;
+	}
 
 	/*
 	 * Update our statistics.
@@ -2298,7 +2332,7 @@ moea64_pvo_remove_from_pmap(mmu_t mmu, s
 	    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
 		pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
 		if (pg != NULL) {
-			refchg = atomic_readandclear_32(&pg->md.mdpg_attrs);
+			refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
 			if (refchg & LPTE_CHG)
 				vm_page_dirty(pg);
 			if (refchg & LPTE_REF)

Modified: user/nwhitehorn/ppc64-pmap-rework/aim/mmu_oea64.h
==============================================================================
--- user/nwhitehorn/ppc64-pmap-rework/aim/mmu_oea64.h	Fri Feb 20 02:43:08 2015	(r279039)
+++ user/nwhitehorn/ppc64-pmap-rework/aim/mmu_oea64.h	Fri Feb 20 05:17:24 2015	(r279040)
@@ -40,8 +40,6 @@ extern mmu_def_t oea64_mmu;
 vm_offset_t	moea64_bootstrap_alloc(vm_size_t, u_int);
 /* Set an LPTE structure to match the contents of a PVO */
 void	moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte);
-/* Synchronize reference/changed bits of a given PTE with the VM system. */
-void	moea64_sync_refchg(uint64_t lpte_lo);
 
 /*
  * Flags

Modified: user/nwhitehorn/ppc64-pmap-rework/aim/moea64_if.m
==============================================================================
--- user/nwhitehorn/ppc64-pmap-rework/aim/moea64_if.m	Fri Feb 20 02:43:08 2015	(r279039)
+++ user/nwhitehorn/ppc64-pmap-rework/aim/moea64_if.m	Fri Feb 20 05:17:24 2015	(r279040)
@@ -44,6 +44,20 @@
 
 INTERFACE moea64;
 
+CODE {
+	static moea64_pte_replace_t moea64_pte_replace_default;
+
+	static int64_t moea64_pte_replace_default(mmu_t mmu,
+	    struct pvo_entry *pvo, int flags)
+	{
+		int64_t refchg;
+
+		refchg = MOEA64_PTE_UNSET(mmu, pvo);
+		MOEA64_PTE_INSERT(mmu, pvo);
+
+		return (refchg);
+	}
+}
 
 /**
  * Return ref/changed bits from PTE referenced by _pvo if _pvo is currently in
@@ -66,11 +80,10 @@ METHOD int64_t pte_clear {
 };
 
 /**
- * Invalidate the PTE referenced by _pvo, synchronizing its validity
- * and ref/changed bits after completion to the backing page.
- * Does nothing if PTE not currently present in page table.
+ * Invalidate the PTE referenced by _pvo, returning its ref/changed bits.
+ * Returns -1 if PTE not currently present in page table.
  */
-METHOD void pte_unset {
+METHOD int64_t pte_unset {
 	mmu_t		_mmu;
 	struct pvo_entry *_pvo;
 };
@@ -79,7 +92,7 @@ METHOD void pte_unset {
  * Update the reference PTE to correspond to the contents of _pvo. Has the
  * same ref/changed semantics as pte_unset() (and should clear R/C bits). May
  * change the PVO's location in the page table or return with it unmapped if
- * PVO_WIRED is not set.
+ * PVO_WIRED is not set. By default, does unset() followed by insert().
  * 
  * _flags is a bitmask describing what level of page invalidation should occur:
  *   0 means no invalidation is required
@@ -87,11 +100,11 @@ METHOD void pte_unset {
  *   MOEA64_PTE_INVALIDATE requires an invalidation of the same strength as
  *    pte_unset() followed by pte_insert() 
  */
-METHOD void pte_replace {
+METHOD int64_t pte_replace {
 	mmu_t		_mmu;
 	struct pvo_entry *_pvo;
 	int		_flags;
-};
+} DEFAULT moea64_pte_replace_default;
 
 /**
  * Insert a PTE corresponding to _pvo into the page table, returning any errors

Modified: user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c
==============================================================================
--- user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c	Fri Feb 20 02:43:08 2015	(r279039)
+++ user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c	Fri Feb 20 05:17:24 2015	(r279040)
@@ -190,8 +190,8 @@ static struct rwlock moea64_eviction_loc
 static int	moea64_pte_insert_native(mmu_t, struct pvo_entry *);
 static int64_t	moea64_pte_synch_native(mmu_t, struct pvo_entry *);
 static int64_t	moea64_pte_clear_native(mmu_t, struct pvo_entry *, uint64_t);
-static void	moea64_pte_replace_native(mmu_t, struct pvo_entry *, int);
-static void	moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *);
+static int64_t	moea64_pte_replace_native(mmu_t, struct pvo_entry *, int);
+static int64_t	moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *);
 
 /*
  * Utility routines.
@@ -312,7 +312,7 @@ moea64_pte_set_native(struct lpte *pvo_p
 	return (0);
 }
 
-static void
+static int64_t
 moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *pvo)
 {
 	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
@@ -328,7 +328,7 @@ moea64_pte_unset_native(mmu_t mmu, struc
 	    (properpt.pte_hi & LPTE_AVPN_MASK)) {
 		/* Evicted */
 		rw_runlock(&moea64_eviction_lock);
-		return;
+		return (-1);
 	}
 
 	/*
@@ -343,16 +343,13 @@ moea64_pte_unset_native(mmu_t mmu, struc
 	*((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */
 	critical_exit();
 
-	/*
-	 * Save the ref & chg bits.
-	 */
-	moea64_sync_refchg(ptelo);
-
 	/* Keep statistics */
 	moea64_pte_valid--;
+
+	return (ptelo & (LPTE_CHG | LPTE_REF));
 }
 
-static void
+static int64_t
 moea64_pte_replace_native(mmu_t mmu, struct pvo_entry *pvo, int flags)
 {
 	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
@@ -367,17 +364,10 @@ moea64_pte_replace_native(mmu_t mmu, str
 	result = atomic_cmpset_32((volatile int32_t *)(&pt->pte_hi) + 1,
 	    properpt.pte_hi & 0xffffffff, (properpt.pte_hi | LPTE_LOCKED)
 	     & 0xffffffff);
-	if (!result)
-		return;		/* Page being evicted or already evicted */
 
-	if (flags == 0) {
+	if (result && flags == 0) {
 		/* Just some software bits changing. The order in which this is
 		 * done is important on 32-bit systems. */
-		result = atomic_cmpset_32((volatile int32_t *)(&pt->pte_hi) + 1,
-		    properpt.pte_hi & 0xffffffff,
-		    (properpt.pte_hi | LPTE_LOCKED) & 0xffffffff);
-		if (!result)
-			return;	/* Page being evicted or already evicted */
 		*(volatile int32_t *)(&pt->pte_hi) = properpt.pte_hi >> 32;
 		*(volatile int32_t *)(&pt->pte_lo) = properpt.pte_lo >> 32;
 		pt->pte_lo = properpt.pte_lo & 0xffffffff;
@@ -386,16 +376,19 @@ moea64_pte_replace_native(mmu_t mmu, str
 		EIEIO();
 		*((volatile int32_t *)(&pt->pte_hi) + 1) =
 		    properpt.pte_hi & 0xffffffff;
-		moea64_sync_refchg(properpt.pte_lo | lowbits);
-		return;
+		return (lowbits & (LPTE_REF | LPTE_CHG));
 	}
 
 	/*
 	 * Anything else requires invalidation and reinsertion, so just do that
 	 */
 
-	moea64_pte_unset_native(mmu, pvo);
+	lowbits = -1;
+	if (result)
+		lowbits = moea64_pte_unset_native(mmu, pvo);
 	moea64_pte_insert_native(mmu, pvo);
+
+	return (lowbits);
 }
 
 static void
@@ -650,9 +643,6 @@ moea64_pte_insert_native(mmu_t mmu, stru
 	    (ADDR_API_SHFT64 - ADDR_PIDX_SHFT));
 	critical_exit();
 	
-	/* Save ref/changed from old page */
-	moea64_sync_refchg(be64toh(pt->pte_lo));
-
 	/* New translation */
 	pt->pte_lo = insertpt.pte_lo;
 	EIEIO();

Modified: user/nwhitehorn/ppc64-pmap-rework/conf/GENERIC64
==============================================================================
--- user/nwhitehorn/ppc64-pmap-rework/conf/GENERIC64	Fri Feb 20 02:43:08 2015	(r279039)
+++ user/nwhitehorn/ppc64-pmap-rework/conf/GENERIC64	Fri Feb 20 05:17:24 2015	(r279040)
@@ -28,7 +28,7 @@ makeoptions	WITH_CTF=1
 
 # Platform support
 options 	POWERMAC		#NewWorld Apple PowerMacs
-#options 	PS3			#Sony Playstation 3
+options 	PS3			#Sony Playstation 3
 options 	MAMBO			#IBM Mambo Full System Simulator
 options 	PSERIES			#PAPR-compliant systems (e.g. IBM p)
 

Modified: user/nwhitehorn/ppc64-pmap-rework/ps3/mmu_ps3.c
==============================================================================
--- user/nwhitehorn/ppc64-pmap-rework/ps3/mmu_ps3.c	Fri Feb 20 02:43:08 2015	(r279039)
+++ user/nwhitehorn/ppc64-pmap-rework/ps3/mmu_ps3.c	Fri Feb 20 05:17:24 2015	(r279040)
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#include <sys/rwlock.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
@@ -67,15 +68,10 @@ static uint64_t mps3_vas_id;
 static void	mps3_bootstrap(mmu_t mmup, vm_offset_t kernelstart,
 		    vm_offset_t kernelend);
 static void	mps3_cpu_bootstrap(mmu_t mmup, int ap);
-static void	mps3_pte_synch(mmu_t, uintptr_t pt, struct lpte *pvo_pt);
-static void	mps3_pte_clear(mmu_t, uintptr_t pt, struct lpte *pvo_pt,
-		    uint64_t vpn, uint64_t ptebit);
-static void	mps3_pte_unset(mmu_t, uintptr_t pt, struct lpte *pvo_pt,
-		    uint64_t vpn);
-static void	mps3_pte_change(mmu_t, uintptr_t pt, struct lpte *pvo_pt,
-		    uint64_t vpn);
-static int	mps3_pte_insert(mmu_t, u_int ptegidx, struct lpte *pvo_pt);
-static uintptr_t mps3_pvo_to_pte(mmu_t, const struct pvo_entry *pvo);
+static int64_t	mps3_pte_synch(mmu_t, struct pvo_entry *);
+static int64_t	mps3_pte_clear(mmu_t, struct pvo_entry *, uint64_t ptebit);
+static int64_t	mps3_pte_unset(mmu_t, struct pvo_entry *);
+static int	mps3_pte_insert(mmu_t, struct pvo_entry *);
 
 
 static mmu_method_t mps3_methods[] = {
@@ -85,20 +81,22 @@ static mmu_method_t mps3_methods[] = {
 	MMUMETHOD(moea64_pte_synch,	mps3_pte_synch),
 	MMUMETHOD(moea64_pte_clear,	mps3_pte_clear),
 	MMUMETHOD(moea64_pte_unset,	mps3_pte_unset),
-	MMUMETHOD(moea64_pte_change,	mps3_pte_change),
 	MMUMETHOD(moea64_pte_insert,	mps3_pte_insert),
-	MMUMETHOD(moea64_pvo_to_pte,	mps3_pvo_to_pte),
 
         { 0, 0 }
 };
 
 MMU_DEF_INHERIT(ps3_mmu, "mmu_ps3", mps3_methods, 0, oea64_mmu);
 
+static struct rwlock mps3_eviction_lock;
+
 static void
 mps3_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	uint64_t final_pteg_count;
 
+	rw_init(&mps3_eviction_lock, "pte eviction");
+
 	moea64_early_bootstrap(mmup, kernelstart, kernelend);
 
 	lv1_construct_virtual_address_space(
@@ -151,72 +149,94 @@ mps3_cpu_bootstrap(mmu_t mmup, int ap)
 	}
 }
 
-static void
-mps3_pte_synch(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt)
+static int64_t
+mps3_pte_synch(mmu_t mmu, struct pvo_entry *pvo)
 {
 	uint64_t halfbucket[4], rcbits;
 	
 	PTESYNC();
-	lv1_read_htab_entries(mps3_vas_id, slot & ~0x3UL, &halfbucket[0],
-	    &halfbucket[1], &halfbucket[2], &halfbucket[3], &rcbits);
+	lv1_read_htab_entries(mps3_vas_id, pvo->pvo_pte.slot & ~0x3UL,
+	    &halfbucket[0], &halfbucket[1], &halfbucket[2], &halfbucket[3],
+	    &rcbits);
+
+	/* Check if present in page table */
+	if ((halfbucket[pvo->pvo_pte.slot & 0x3] & LPTE_AVPN_MASK) !=
+	    ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) &
+	    LPTE_AVPN_MASK))
+		return (-1);
+	if (!(halfbucket[pvo->pvo_pte.slot & 0x3] & LPTE_VALID))
+		return (-1);
 
 	/*
 	 * rcbits contains the low 12 bits of each PTEs 2nd part,
 	 * spaced at 16-bit intervals
 	 */
 
-	KASSERT((halfbucket[slot & 0x3] & LPTE_AVPN_MASK) ==
-	    (pvo_pt->pte_hi & LPTE_AVPN_MASK),
-	    ("PTE upper word %#lx != %#lx\n",
-	    halfbucket[slot & 0x3], pvo_pt->pte_hi));
-
- 	pvo_pt->pte_lo |= (rcbits >> ((3 - (slot & 0x3))*16)) &
-	    (LPTE_CHG | LPTE_REF);
+	return ((rcbits >> ((3 - (pvo->pvo_pte.slot & 0x3))*16)) &
+	    (LPTE_CHG | LPTE_REF));
 }
 
-static void
-mps3_pte_clear(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn,
-    u_int64_t ptebit)
+static int64_t
+mps3_pte_clear(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit)
 {
+	int64_t refchg;
+	struct lpte pte;
+
+	rw_rlock(&mps3_eviction_lock);
+
+	refchg = mps3_pte_synch(mmu, pvo);
+        if (refchg < 0) {
+                rw_runlock(&mps3_eviction_lock);
+                return (refchg);
+        }
 
-	lv1_write_htab_entry(mps3_vas_id, slot, pvo_pt->pte_hi,
-	    pvo_pt->pte_lo & ~ptebit);
+	moea64_pte_from_pvo(pvo, &pte);
+
+	pte.pte_lo |= refchg;
+	pte.pte_lo &= ~ptebit;
+	lv1_write_htab_entry(mps3_vas_id, pvo->pvo_pte.slot, pte.pte_hi,
+	    pte.pte_lo);
+	rw_runlock(&mps3_eviction_lock);
+
+	return (refchg);
 }
 
-static void
-mps3_pte_unset(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn)
+static int64_t
+mps3_pte_unset(mmu_t mmu, struct pvo_entry *pvo)
 {
+	int64_t refchg;
 
-	mps3_pte_synch(mmu, slot, pvo_pt);
-	pvo_pt->pte_hi &= ~LPTE_VALID;
-	lv1_write_htab_entry(mps3_vas_id, slot, 0, 0);
+	rw_rlock(&mps3_eviction_lock);
+	refchg = mps3_pte_synch(mmu, pvo);
+        if (refchg < 0) {
+                rw_runlock(&mps3_eviction_lock);
+                return (-1);
+        }
+	/* XXX: race on RC bits between unset and sync. Anything to do? */
+	lv1_write_htab_entry(mps3_vas_id, pvo->pvo_pte.slot, 0, 0);
+	rw_runlock(&mps3_eviction_lock);
 	moea64_pte_valid--;
-}
 
-static void
-mps3_pte_change(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn)
-{
- 
-	mps3_pte_synch(mmu, slot, pvo_pt);
-	lv1_write_htab_entry(mps3_vas_id, slot, pvo_pt->pte_hi,
-	    pvo_pt->pte_lo);
+	return (refchg & (LPTE_REF | LPTE_CHG));
 }
 
 static int
-mps3_pte_insert(mmu_t mmu, u_int ptegidx, struct lpte *pvo_pt)
+mps3_pte_insert(mmu_t mmu, struct pvo_entry *pvo)
 {
 	int result;
-	struct lpte evicted;
-	struct pvo_entry *pvo;
+	struct lpte pte, evicted;
 	uint64_t index;
 
-	pvo_pt->pte_hi |= LPTE_VALID;
-	pvo_pt->pte_hi &= ~LPTE_HID;
+	pvo->pvo_pte.slot &= ~7UL;
+	pvo->pvo_vaddr &= ~PVO_HID;
+	moea64_pte_from_pvo(pvo, &pte);
 	evicted.pte_hi = 0;
 	PTESYNC();
-	result = lv1_insert_htab_entry(mps3_vas_id, ptegidx << 3,
-	    pvo_pt->pte_hi, pvo_pt->pte_lo, LPTE_LOCKED | LPTE_WIRED, 0,
+	rw_wlock(&mps3_eviction_lock);
+	result = lv1_insert_htab_entry(mps3_vas_id, pvo->pvo_pte.slot,
+	    pte.pte_hi, pte.pte_lo, LPTE_LOCKED | LPTE_WIRED, 0,
 	    &index, &evicted.pte_hi, &evicted.pte_lo);
+	rw_wunlock(&mps3_eviction_lock);
 
 	if (result != 0) {
 		/* No freeable slots in either PTEG? We're hosed. */
@@ -227,84 +247,19 @@ mps3_pte_insert(mmu_t mmu, u_int ptegidx
 	/*
 	 * See where we ended up.
 	 */
-	if (index >> 3 != ptegidx)
-		pvo_pt->pte_hi |= LPTE_HID;
+	if ((index & ~7UL) != pvo->pvo_pte.slot)
+		pvo->pvo_vaddr |= PVO_HID;
+	pvo->pvo_pte.slot = index;
 
 	moea64_pte_valid++;
 
-	if (!evicted.pte_hi)
-		return (index & 0x7);
-
-	/*
-	 * Synchronize the sacrifice PTE with its PVO, then mark both
-	 * invalid. The PVO will be reused when/if the VM system comes
-	 * here after a fault.
-	 */
-
-	ptegidx = index >> 3; /* Where the sacrifice PTE was found */
-	if (evicted.pte_hi & LPTE_HID)
-		ptegidx ^= moea64_pteg_mask; /* PTEs indexed by primary */
-
-	KASSERT((evicted.pte_hi & (LPTE_WIRED | LPTE_LOCKED)) == 0,
-	    ("Evicted a wired PTE"));
-
-	result = 0;
-	LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) {
-		if (!PVO_PTEGIDX_ISSET(pvo))
-			continue;
-
-		if (pvo->pvo_pte.lpte.pte_hi == (evicted.pte_hi | LPTE_VALID)) {
-			KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID,
-			    ("Invalid PVO for valid PTE!"));
-			pvo->pvo_pte.lpte.pte_hi &= ~LPTE_VALID;
-			pvo->pvo_pte.lpte.pte_lo |=
-			    evicted.pte_lo & (LPTE_REF | LPTE_CHG);
-			PVO_PTEGIDX_CLR(pvo);
-			moea64_pte_valid--;
-			moea64_pte_overflow++;
-			result = 1;
-			break;
-		}
+	if (evicted.pte_hi) {
+		KASSERT((evicted.pte_hi & (LPTE_WIRED | LPTE_LOCKED)) == 0,
+		    ("Evicted a wired PTE"));
+		moea64_pte_valid--;
+		moea64_pte_overflow++;
 	}
 
-	KASSERT(result == 1, ("PVO for sacrifice PTE not found"));
-
-	return (index & 0x7);
-}
-
-static __inline u_int
-va_to_pteg(uint64_t vsid, vm_offset_t addr, int large)
-{
-	uint64_t hash;
-	int shift;
-
-	shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT;
-	hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >>
-	    shift);
-	return (hash & moea64_pteg_mask);
-}
-
-uintptr_t
-mps3_pvo_to_pte(mmu_t mmu, const struct pvo_entry *pvo)
-{
-	uint64_t vsid;
-	u_int ptegidx;
-
-	/* If the PTEG index is not set, then there is no page table entry */
-	if (!PVO_PTEGIDX_ISSET(pvo))
-		return (-1);
-
-	vsid = PVO_VSID(pvo);
-	ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE);
-
-	/*
-	 * We can find the actual pte entry without searching by grabbing
-	 * the PTEG index from 3 unused bits in pvo_vaddr and by
-	 * noticing the HID bit.
-	 */
-	if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID)
-		ptegidx ^= moea64_pteg_mask;
-
-	return ((ptegidx << 3) | PVO_PTEGIDX_GET(pvo));
+	return (0);
 }
 

Modified: user/nwhitehorn/ppc64-pmap-rework/pseries/mmu_phyp.c
==============================================================================
--- user/nwhitehorn/ppc64-pmap-rework/pseries/mmu_phyp.c	Fri Feb 20 02:43:08 2015	(r279039)
+++ user/nwhitehorn/ppc64-pmap-rework/pseries/mmu_phyp.c	Fri Feb 20 05:17:24 2015	(r279040)
@@ -70,8 +70,7 @@ static void	mphyp_bootstrap(mmu_t mmup, 
 static void	mphyp_cpu_bootstrap(mmu_t mmup, int ap);
 static int64_t	mphyp_pte_synch(mmu_t, struct pvo_entry *pvo);
 static int64_t	mphyp_pte_clear(mmu_t, struct pvo_entry *pvo, uint64_t ptebit);
-static void	mphyp_pte_unset(mmu_t, struct pvo_entry *pvo);
-static void	mphyp_pte_replace(mmu_t, struct pvo_entry *pvo, int flags);
+static int64_t	mphyp_pte_unset(mmu_t, struct pvo_entry *pvo);
 static int	mphyp_pte_insert(mmu_t, struct pvo_entry *pvo);
 
 static mmu_method_t mphyp_methods[] = {
@@ -81,7 +80,6 @@ static mmu_method_t mphyp_methods[] = {
 	MMUMETHOD(moea64_pte_synch,     mphyp_pte_synch),
         MMUMETHOD(moea64_pte_clear,     mphyp_pte_clear),
         MMUMETHOD(moea64_pte_unset,     mphyp_pte_unset),
-        MMUMETHOD(moea64_pte_replace,   mphyp_pte_replace),
         MMUMETHOD(moea64_pte_insert,    mphyp_pte_insert),
 
 	/* XXX: pmap_copy_page, pmap_init_page with H_PAGE_INIT */
@@ -91,23 +89,6 @@ static mmu_method_t mphyp_methods[] = {
 
 MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, 0, oea64_mmu);
 
-static int brokenkvm = 0;
-
-static void
-print_kvm_bug_warning(void *data)
-{
-
-	if (brokenkvm)
-		printf("WARNING: Running on a broken hypervisor that does "
-		    "not support mandatory H_CLEAR_MOD and H_CLEAR_REF "
-		    "hypercalls. Performance will be suboptimal.\n");
-}
-
-SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1,
-    print_kvm_bug_warning, NULL);
-SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning,
-    NULL);
-
 static void
 mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
@@ -200,10 +181,6 @@ mphyp_bootstrap(mmu_t mmup, vm_offset_t 
 
 	moea64_mid_bootstrap(mmup, kernelstart, kernelend);
 	moea64_late_bootstrap(mmup, kernelstart, kernelend);
-
-	/* Test for broken versions of KVM that don't conform to the spec */
-	if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION)
-		brokenkvm = 1;
 }
 
 static void
@@ -252,7 +229,6 @@ mphyp_pte_clear(mmu_t mmu, struct pvo_en
 {
 	int64_t refchg;
 	uint64_t ptelo, junk;
-	int err;
 
 	/*
 	 * This involves two steps (synch and clear) so we need the entry
@@ -270,28 +246,14 @@ mphyp_pte_clear(mmu_t mmu, struct pvo_en
 		return (refchg);
 	}
 
-	if (brokenkvm) {
-		/*
-		 * No way to clear either bit, which is total madness.
-		 * Pessimistically claim that, once modified, it stays so
-		 * forever and that it is never referenced.
-		 */
-		rw_runlock(&mphyp_eviction_lock);
-		return (refchg & ~LPTE_REF);
-	}
-
 	if (ptebit & LPTE_CHG) {
-		err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0,
-		    &ptelo, &junk, &junk);
-		KASSERT(err == H_SUCCESS,
-		    ("Error clearing page change bit: %d", err));
+		phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0, &ptelo,
+		    &junk, &junk);
 		refchg |= (ptelo & LPTE_CHG);
 	}
 	if (ptebit & LPTE_REF) {
-		err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0,
-		    &ptelo, &junk, &junk);
-		KASSERT(err == H_SUCCESS,
-		    ("Error clearing page reference bit: %d", err));
+		phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0, &ptelo,
+		    &junk, &junk);
 		refchg |= (ptelo & LPTE_REF);
 	}
 
@@ -300,7 +262,7 @@ mphyp_pte_clear(mmu_t mmu, struct pvo_en
 	return (refchg);
 }
 
-static void
+static int64_t
 mphyp_pte_unset(mmu_t mmu, struct pvo_entry *pvo)
 {
 	struct lpte pte;
@@ -317,27 +279,12 @@ mphyp_pte_unset(mmu_t mmu, struct pvo_en
 	KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
 	    ("Error removing page: %d", err));
 
-	if (err == H_SUCCESS)
-		moea64_sync_refchg((pvo->pvo_pte.pa & LPTE_RPGN) |
-		    (pte.pte_lo & (LPTE_REF | LPTE_CHG)));
-
-	if (err == H_NOT_FOUND)
+	if (err == H_NOT_FOUND) {
 		moea64_pte_overflow--;
-}
-
-static void
-mphyp_pte_replace(mmu_t mmu, struct pvo_entry *pvo, int flags)
-{
-
-	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+		return (-1);
+	}
 
-	/*
-	 * For anything other than a simple page protection update, we have to
-	 * reinsert the page. H_PROTECT is somehow slower than this, so just do
-	 * unset followed by insert unconditionally.
-	 */
-	mphyp_pte_unset(mmu, pvo);
-	mphyp_pte_insert(mmu, pvo);
+	return (pte.pte_lo & (LPTE_REF | LPTE_CHG));
 }
 
 static uintptr_t
@@ -352,7 +299,7 @@ mphyp_pte_spillable_ident(uintptr_t pteg
 	k = -1;
 	for (j = 0; j < 8; j++) {
 		slot = ptegbase + (i + j) % 8;
-		phyp_pft_hcall(H_READ, H_R_XLATE, slot, 0, 0, &pt.pte_hi,
+		phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi,
 		    &pt.pte_lo, &junk);
 		
 		if (pt.pte_hi & LPTE_WIRED)
@@ -362,7 +309,7 @@ mphyp_pte_spillable_ident(uintptr_t pteg
 		k = slot;
 
 		/* Try to get a page that has not been used lately */
-		if (!(pt.pte_lo & LPTE_REF)) {
+		if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) {
 			memcpy(to_evict, &pt, sizeof(struct lpte));
 			return (k);
 		}
@@ -371,7 +318,7 @@ mphyp_pte_spillable_ident(uintptr_t pteg
 	if (k == -1)
 		return (k);
 
-	phyp_pft_hcall(H_READ, H_R_XLATE, k, 0, 0, &to_evict->pte_hi,
+	phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi,
 	    &to_evict->pte_lo, &junk);
 	return (k);
 }
@@ -458,14 +405,6 @@ mphyp_pte_insert(mmu_t mmu, struct pvo_e
 		moea64_pte_overflow++;
 		KASSERT(result == H_SUCCESS,
 		    ("Error evicting page: %d", (int)result));
-
-		/*
-		 * Update VM system on final disposition of the old entry. The
-		 * RPN we get from H_REMOVE is not necessarily meaningful, so
-		 * combine the one from H_READ with the RC bits from H_REMOVE
-		 */
-		moea64_sync_refchg((evicted.pte_lo & LPTE_RPGN) | 
-		    (lastptelo & (LPTE_REF | LPTE_CHG)));
 	}
 
 	/*


More information about the svn-src-user mailing list