svn commit: r267857 - projects/amd64_xen_pv/sys/amd64/xen

Cherry G. Mathew cherry at FreeBSD.org
Wed Jun 25 08:55:21 UTC 2014


Author: cherry
Date: Wed Jun 25 08:55:20 2014
New Revision: 267857
URL: http://svnweb.freebsd.org/changeset/base/267857

Log:
  Towards single usermode with direct mapped (using 4K pages) XEN PV
  kernel (not complete).
  
  We also disable use of the pg_nx bit.
  
  Approved by:	gibbs (implicit)

Modified:
  projects/amd64_xen_pv/sys/amd64/xen/machdep.c
  projects/amd64_xen_pv/sys/amd64/xen/pmap.c

Modified: projects/amd64_xen_pv/sys/amd64/xen/machdep.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/machdep.c	Wed Jun 25 08:51:16 2014	(r267856)
+++ projects/amd64_xen_pv/sys/amd64/xen/machdep.c	Wed Jun 25 08:55:20 2014	(r267857)
@@ -621,6 +621,7 @@ initxen(struct start_info *si)
 
 	identify_cpu();		/* Final stage of CPU initialization */
 	initializecpu();
+	pg_nx = 0; /* XXX: Handle this properly for Xen PV */
 	initializecpucache();
 
 	init_param2(physmem);

Modified: projects/amd64_xen_pv/sys/amd64/xen/pmap.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/pmap.c	Wed Jun 25 08:51:16 2014	(r267856)
+++ projects/amd64_xen_pv/sys/amd64/xen/pmap.c	Wed Jun 25 08:55:20 2014	(r267857)
@@ -338,7 +338,7 @@ struct pmap kernel_pmap_store;
 		      (va) <= DMAP_MAX_ADDRESS)
 #define ISKERNELVA(va) ((va) >= VM_MIN_KERNEL_ADDRESS && \
 			(va) <= VM_MAX_KERNEL_ADDRESS)
-#define ISBOOTVA(va) ((va) >= KERNBASE && (va) <= virtual_avail) /* XXX: keep an eye on virtual_avail */
+#define ISBOOTVA(va) ((va) >= KERNBASE && (va) <= (xenstack + 512 * 1024))
 
 uintptr_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 uintptr_t virtual_end;	/* VA of last avail page (end of kernel AS) */
@@ -348,7 +348,7 @@ int nkpt;
 static int ndmpdp;
 vm_paddr_t dmaplimit;
 uintptr_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
-pt_entry_t pg_nx = 0; /* XXX: probe for this ? */
+pt_entry_t pg_nx = 0; /* XXX: Correctly handle this for Xen PV */
 
 struct msgbuf *msgbufp = 0;
 
@@ -609,19 +609,24 @@ pmap_xen_setpages_ro(uintptr_t va, vm_si
 	vm_size_t i;
 	pt_entry_t PG_V;
 	uintptr_t tva;
-	vm_paddr_t ma;
+	vm_paddr_t pa, ma;
 
 	PG_V = pmap_valid_bit(kernel_pmap);
 
 	for (i = 0; i < npages; i++) {
 		tva = va + ptoa(i);
-		ma = phystomach(ISBOOTVA(tva) ? VTOP(tva) :
-				ISDMAPVA(tva) ? DMAP_TO_PHYS(tva) :
-				0);
-		KASSERT(ma != 0, ("%s: Unknown kernel va \n", __func__));
+		pa = ISBOOTVA(tva) ? VTOP(tva) :
+			ISDMAPVA(tva) ? DMAP_TO_PHYS(tva) :
+			ISKERNELVA(tva) ? pmap_kextract(tva) :
+			0;
+
+		KASSERT(pa != 0, ("%s: Unknown kernel va \n", __func__));
+
+		ma = phystomach(pa);
 
-		PT_SET_MA(va + PAGE_SIZE * i,
+		PT_SET_MA(tva,
 			  ma | PG_U | PG_V);
+
 	}
 }
 
@@ -646,7 +651,7 @@ pmap_xen_setpages_rw(uintptr_t va, vm_si
 		KASSERT(ma != 0, ("%s: Unknown kernel va \n", __func__));
 
 			      
-		PT_SET_MA(va + PAGE_SIZE * i, 
+		PT_SET_MA(tva,
 			  ma | PG_U | PG_V | PG_RW);
 	}
 }
@@ -1525,9 +1530,7 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
 
 	while (pte < endpte) {
 		m = *ma++;
-#ifdef XXX
 		cache_bits = pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0);
-#endif
 		pa = VM_PAGE_TO_PHYS(m) | cache_bits;
 		if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != xpmap_ptom(pa)) {
 			oldpte |= *pte;
@@ -1574,6 +1577,9 @@ pmap_free_zero_pages(struct spglist *fre
 
 	while ((m = SLIST_FIRST(free)) != NULL) {
 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
+
+		pmap_xen_setpages_rw(MACH_TO_DMAP(VM_PAGE_TO_MACH(m)), 1);
+
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
@@ -1683,14 +1689,14 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_
 		/* We just released a PT, unhold the matching PD */
 		vm_page_t pdpg;
 
-		pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME);
+		pdpg = MACH_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME);
 		pmap_unwire_ptp(pmap, va, pdpg, free);
 	}
 	if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) {
 		/* We just released a PD, unhold the matching PDP */
 		vm_page_t pdppg;
 
-		pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME);
+		pdppg = MACH_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME);
 		pmap_unwire_ptp(pmap, va, pdppg, free);
 	}
 
@@ -1722,7 +1728,7 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t v
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
-	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
+	mpte = MACH_TO_VM_PAGE(ptepde & PG_FRAME);
 	return (pmap_unwire_ptp(pmap, va, mpte, free));
 }
 
@@ -1752,6 +1758,7 @@ pmap_pinit(pmap_t pmap)
 	 * allocate the page directory page
 	 */
 	pmap->pm_pml4 = (void *) kmem_malloc(kernel_arena, PAGE_SIZE, M_ZERO);
+
 	if (pmap->pm_pml4 == NULL) return 0;
 
 	pmap->pm_cr3 = pmap_kextract_ma((vm_offset_t)pmap->pm_pml4);
@@ -1769,6 +1776,9 @@ pmap_pinit(pmap_t pmap)
 
 	pmap_xen_setpages_ro((uintptr_t)pmap->pm_pml4, 1);
 
+	/* Also mark DMAP alias r/o */
+	pmap_xen_setpages_ro(MACH_TO_DMAP(pmap->pm_cr3), 1);
+
 	xen_pgdir_pin(pmap->pm_cr3);
 
 	pmap->pm_root.rt_root = 0;
@@ -1829,6 +1839,8 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t 
 		pmap_zero_page(m);
 	}
 
+	pmap_xen_setpages_ro(MACH_TO_DMAP(VM_PAGE_TO_MACH(m)), 1);
+
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
@@ -1859,6 +1871,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t 
 			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
+				pmap_xen_setpages_rw(MACH_TO_DMAP(VM_PAGE_TO_MACH(m)), 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
@@ -1892,6 +1905,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t 
 			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
+				pmap_xen_setpages_rw(MACH_TO_DMAP(VM_PAGE_TO_MACH(m)), 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
@@ -1907,6 +1921,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t 
 					--m->wire_count;
 					atomic_subtract_int(&vm_cnt.v_wire_count,
 					    1);
+					pmap_xen_setpages_rw(MACH_TO_DMAP(VM_PAGE_TO_MACH(m)), 1);
 					vm_page_free_zero(m);
 					return (NULL);
 				}
@@ -2033,6 +2048,7 @@ pmap_release(pmap_t pmap)
 	    ("pmap_release: pmap has reserved page table page(s)"));
 
 	xen_pgdir_unpin(pmap->pm_cr3);
+	pmap_xen_setpages_rw(MACH_TO_DMAP(pmap->pm_cr3), 1);
 	pmap_xen_setpages_rw((uintptr_t)pmap->pm_pml4, 1);
 
 	bzero(pmap->pm_pml4, PAGE_SIZE);
@@ -2437,7 +2453,6 @@ retry:
 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 	dump_add_page(m->phys_addr);
 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
-	pmap_xen_setpages_rw((uintptr_t)pc, 1);
 	invlpg((vm_offset_t)pc);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
@@ -2565,7 +2580,6 @@ pmap_pvh_remove(struct md_page *pvh, pma
 
 void pmap_xen_userload(pmap_t pmap)
 {
-  	(void) pmap_allocpde; /* XXX: */
 	KASSERT(pmap != kernel_pmap, 
 		("Kernel pmap requested on user load.\n"));
 
@@ -2822,7 +2836,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, 
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
 	pa = VM_PAGE_TO_PHYS(m);
-	newpte = (pt_entry_t)(pa | PG_A | PG_V);
+	newpte = (pt_entry_t)(phystomach(pa) | PG_A | PG_V);
 	if ((access & VM_PROT_WRITE) != 0)
 		newpte |= PG_M;
 	if ((prot & VM_PROT_WRITE) != 0)
@@ -2835,8 +2849,11 @@ pmap_enter(pmap_t pmap, vm_offset_t va, 
 		newpte |= PG_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpte |= PG_U;
+	/* On xen this is a security hole  unless you know what you're doing
 	if (pmap == kernel_pmap)
 		newpte |= PG_G;
+	*/
+
 	newpte |= pmap_cache_bits(pmap, m->md.pat_mode, 0);
 
 	/*
@@ -2866,7 +2883,7 @@ retry:
 		/* XXX: PG_PS: pmap_demote_pde_locked(pmap, pde, va, &lock) */ {
 		pte = pmap_pde_to_pte(pde, va);
 		if (va < VM_MAXUSER_ADDRESS && mpte == NULL) {
-			mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+			mpte = MACH_TO_VM_PAGE(*pde & PG_FRAME);
 			mpte->wire_count++;
 		}
 	} else if (va < VM_MAXUSER_ADDRESS) {
@@ -2875,6 +2892,7 @@ retry:
 		 * deallocated.
 		 */
 		mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), &lock);
+		//panic(__func__);
 		goto retry;
 	} else
 		panic("pmap_enter: invalid page directory va=%#lx", va);
@@ -2909,7 +2927,7 @@ retry:
 		/*
 		 * Has the physical page changed?
 		 */
-		opa = origpte & PG_FRAME;
+		opa = machtophys(origpte & PG_FRAME);
 		if (opa == pa) {
 			/*
 			 * No, might be a protection or wiring change.
@@ -2952,7 +2970,7 @@ retry:
 	if ((origpte & PG_V) != 0) {
 validate:
 		origpte = pte_load_store(pte, newpte);
-		opa = origpte & PG_FRAME;
+		opa = machtophys(origpte & PG_FRAME);
 		if (opa != pa) {
 			if ((origpte & PG_MANAGED) != 0) {
 				om = PHYS_TO_VM_PAGE(opa);
@@ -2987,8 +3005,13 @@ validate:
 		}
 		if ((origpte & PG_A) != 0)
 			pmap_invalidate_page(pmap, va);
-	} else
+	} else {
 		pte_store(pte, newpte);
+		if (pmap != kernel_pmap) {
+		  pmap_xen_userload(pmap); /*XXX: Move to kernel (re) entry ? */
+		}
+		 
+	} /* XXX: remove braces */
 
 unchanged:
 
@@ -3129,7 +3152,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_
 			if (ptepa && (*ptepa & PG_V) != 0) {
 				if (*ptepa & PG_PS)
 					return (NULL);
-				mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME);
+				mpte = MACH_TO_VM_PAGE(*ptepa & PG_FRAME);
 				mpte->wire_count++;
 			} else {
 				/*
@@ -3183,10 +3206,12 @@ pmap_enter_quick_locked(pmap_t pmap, vm_
 	/*
 	 * Now validate mapping with RO protection
 	 */
-	if ((m->oflags & VPO_UNMANAGED) != 0)
-		pte_store(pte, pa | PG_V | PG_U);
-	else
-		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
+	if ((m->oflags & VPO_UNMANAGED) != 0) {
+		pte_store(pte, phystomach(pa) | PG_V | PG_U);
+	}
+	else {
+		pte_store(pte, phystomach(pa) | PG_V | PG_U | PG_MANAGED);
+	}
 	return (mpte);
 }
 
@@ -3239,11 +3264,11 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t 
 		pmap_invalidate_page(kernel_pmap, sva);
 	pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
 	if (oldpde & PG_MANAGED) {
-		CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME);
-		pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
+		CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, machtophys(oldpde & PG_PS_FRAME));
+		pvh = pa_to_pvh(machtophys(oldpde & PG_PS_FRAME));
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + NBPDR;
-		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
+		for (va = sva, m = MACH_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++) {
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
@@ -3294,7 +3319,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t 
 		pmap->pm_stats.wired_count -= 1;
 	pmap_resident_count_dec(pmap, 1);
 	if (oldpte & PG_MANAGED) {
-		m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
+		m = MACH_TO_VM_PAGE(oldpte & PG_FRAME);
 		if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		if (oldpte & PG_A)
@@ -4008,9 +4033,9 @@ pmap_remove_pages(pmap_t pmap)
 				}
 
 				if (superpage)
-					pa = tpte & PG_PS_FRAME;
+					pa = machtophys(tpte & PG_PS_FRAME);
 				else
-					pa = tpte & PG_FRAME;
+					pa = machtophys(tpte & PG_FRAME);
 
 				m = PHYS_TO_VM_PAGE(pa);
 				KASSERT(m->phys_addr == pa,
@@ -4042,7 +4067,7 @@ pmap_remove_pages(pmap_t pmap)
 				pc->pc_map[field] |= bitmask;
 				if (superpage) {
 					pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
-					pvh = pa_to_pvh(tpte & PG_PS_FRAME);
+					pvh = pa_to_pvh(machtophys(tpte & PG_PS_FRAME));
 					TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 					pvh->pv_gen++;
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
@@ -4210,17 +4235,23 @@ pmap_is_referenced(vm_page_t m)
  *	Return whether or not the specified virtual address is elgible
  *	for prefault.
  */
-
-/* 
- * XXX: I've just duplicated what native does here. I *think*, with
- * mmu_map.[ch] (which native doesn't have), addr is always
- * prefaultable. Research this.
- */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
-	KASSERT(0, ("XXX: %s: TODO\n", __func__));
-	return false; /* XXX */
+	pd_entry_t *pde;
+	pt_entry_t *pte, PG_V;
+	boolean_t rv;
+
+	PG_V = pmap_valid_bit(pmap);
+	rv = FALSE;
+	PMAP_LOCK(pmap);
+	pde = pmap_pde(pmap, addr);
+	if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) {
+		pte = pmap_pde_to_pte(pde, addr);
+		rv = (*pte & PG_V) == 0;
+	}
+	PMAP_UNLOCK(pmap);
+	return (rv);
 }
 
 /*
@@ -4231,9 +4262,142 @@ pmap_is_prefaultable(pmap_t pmap, vm_off
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
-	KASSERT(0, ("XXX: %s: TODO\n", __func__));
-	return;
+#ifdef LARGEFRAMES
+	struct rwlock *lock;
+#endif
+	pml4_entry_t *pml4e;
+	pdp_entry_t *pdpe;
+	pd_entry_t oldpde, *pde;
+	pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V;
+	vm_offset_t va_next;
+	vm_page_t m;
+	boolean_t anychanged, pv_lists_locked;
+
+	if (advice != MADV_DONTNEED && advice != MADV_FREE)
+		return;
+
+	/*
+	 * A/D bit emulation requires an alternate code path when clearing
+	 * the modified and accessed bits below. Since this function is
+	 * advisory in nature we skip it entirely for pmaps that require
+	 * A/D bit emulation.
+	 */
+	if (pmap_emulate_ad_bits(pmap))
+		return;
 
+	PG_A = pmap_accessed_bit(pmap);
+	PG_G = pmap_global_bit(pmap);
+	PG_M = pmap_modified_bit(pmap);
+	PG_V = pmap_valid_bit(pmap);
+	PG_RW = pmap_rw_bit(pmap);
+
+	pv_lists_locked = FALSE;
+#ifdef LARGEFRAMES
+resume:
+#endif 
+	anychanged = FALSE;
+	PMAP_LOCK(pmap);
+	for (; sva < eva; sva = va_next) {
+		pml4e = pmap_pml4e(pmap, sva);
+		if ((*pml4e & PG_V) == 0) {
+			va_next = (sva + NBPML4) & ~PML4MASK;
+			if (va_next < sva)
+				va_next = eva;
+			continue;
+		}
+		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
+		if ((*pdpe & PG_V) == 0) {
+			va_next = (sva + NBPDP) & ~PDPMASK;
+			if (va_next < sva)
+				va_next = eva;
+			continue;
+		}
+		va_next = (sva + NBPDR) & ~PDRMASK;
+		if (va_next < sva)
+			va_next = eva;
+		pde = pmap_pdpe_to_pde(pdpe, sva);
+		oldpde = *pde;
+		if ((oldpde & PG_V) == 0)
+			continue;
+#ifdef LARGEFRAMES
+		else if ((oldpde & PG_PS) != 0) {
+			if ((oldpde & PG_MANAGED) == 0)
+				continue;
+			if (!pv_lists_locked) {
+				pv_lists_locked = TRUE;
+				if (!rw_try_rlock(&pvh_global_lock)) {
+					if (anychanged)
+						pmap_invalidate_all(pmap);
+					PMAP_UNLOCK(pmap);
+					rw_rlock(&pvh_global_lock);
+					goto resume;
+				}
+			}
+			lock = NULL;
+			if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
+				if (lock != NULL)
+					rw_wunlock(lock);
+
+				/*
+				 * The large page mapping was destroyed.
+				 */
+				continue;
+			}
+
+			/*
+			 * Unless the page mappings are wired, remove the
+			 * mapping to a single page so that a subsequent
+			 * access may repromote.  Since the underlying page
+			 * table page is fully populated, this removal never
+			 * frees a page table page.
+			 */
+			if ((oldpde & PG_W) == 0) {
+				pte = pmap_pde_to_pte(pde, sva);
+				KASSERT((*pte & PG_V) != 0,
+				    ("pmap_advise: invalid PTE"));
+				pmap_remove_pte(pmap, pte, sva, *pde, NULL,
+				    &lock);
+				anychanged = TRUE;
+			}
+			if (lock != NULL)
+				rw_wunlock(lock);
+		}
+#endif /* LARGEFRAMES */
+		if (va_next > eva)
+			va_next = eva;
+		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
+		    sva += PAGE_SIZE) {
+			if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
+			    PG_V))
+				continue;
+			else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+				if (advice == MADV_DONTNEED) {
+					/*
+					 * Future calls to pmap_is_modified()
+					 * can be avoided by making the page
+					 * dirty now.
+					 */
+					m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
+					vm_page_dirty(m);
+				}
+				/* Xen pte updates are atomic */
+				pte_store(pte, *pte & ~(PG_M | PG_A));
+			} else if ((*pte & PG_A) != 0)
+				/* Xen pte updates are atomic */
+				pte_store(pte, *pte & ~PG_A);
+			else
+				continue;
+			if ((*pte & PG_G) != 0)
+				pmap_invalidate_page(pmap, sva);
+			else
+				anychanged = TRUE;
+		}
+	}
+	if (anychanged)
+		pmap_invalidate_all(pmap);
+	if (pv_lists_locked)
+		rw_runlock(&pvh_global_lock);
+	PMAP_UNLOCK(pmap);
 }
 
 void


More information about the svn-src-projects mailing list